Hi there,
It seems that I’m not the first to encounter this issue with Alloy in order to get docker metrics.
Here’s my config:
Using an alloy instance that scrapp host and host’s docker metrics and log, the other receives them and forward to mimir/loki server
Do you have any clue dear community ?
logging {
level = "info"
}
livedebugging {
enabled = true
}
prometheus.exporter.cadvisor "cadvisor" {
store_container_labels = true
docker_host = "unix:///rootfs/var/run/docker.sock"
enabled_metrics = [
"cpu", "sched", "percpu", "memory", "memory_numa", "cpuLoad", "diskIO", "disk",
"network", "tcp", "advtcp", "udp", "app", "process", "hugetlb", "perf_event",
"referenced_memory", "cpu_topology", "resctrl", "cpuset", "oom_event" ]
}
prometheus.scrape "scrape_cadvisor" {
targets = prometheus.exporter.cadvisor.cadvisor.targets
job_name = "host_cadvisor"
forward_to = [prometheus.relabel.job_relabel.receiver]
}
/******* NODE_EXPORTER METRICS *******/
prometheus.exporter.unix "node_exporter" {
rootfs_path = "/rootfs"
procfs_path = "/rootfs/proc"
sysfs_path = "/rootfs/sys"
}
prometheus.scrape "scrape_node_exporter" {
targets = prometheus.exporter.unix.node_exporter.targets
job_name = "host_metrics"
honor_labels = true
forward_to = [prometheus.relabel.job_relabel.receiver]
}
prometheus.relabel "job_relabel" {
forward_to = [otelcol.receiver.prometheus.default.receiver]
rule {
target_label = "job"
replacement = "sdc_monitoring_host"
}
rule {
target_label = "group"
replacement = "host"
}
}
/******* LOGS *******/
local.file_match "collect_logs" {
path_targets = [
{"__path__" = "/rootfs/var/log/*.log"},
{"__path__" = "/rootfs/var/log/**/*.log"},
{"__path__" = "/rootfs/var/lib/docker/containers/**/*.log"},
]
}
loki.source.file "scrape_logs" {
targets = local.file_match.collect_logs.targets
forward_to = [loki.process.add_label.receiver]
}
loki.process "add_label" {
forward_to = [otelcol.receiver.loki.loki_tosend.receiver]
stage.labels {
values = {
group = "host",
}
}
}
/******* DOCKER LOGS *******/
discovery.docker "host" {
host = "unix:///rootfs/var/run/docker.sock"
}
discovery.relabel "logs_integrations_docker" {
targets = []
rule {
target_label = "job"
replacement = "sdc_monitoring_host"
}
rule {
target_label = "instance"
replacement = constants.hostname
}
rule {
source_labels = ["__meta_docker_container_name"]
regex = "/(.*)"
target_label = "container"
}
rule {
source_labels = ["__meta_docker_container_log_stream"]
target_label = "stream"
}
}
loki.source.docker "default" {
host = "unix:///rootfs/var/run/docker.sock"
targets = discovery.docker.host.targets
labels = {"source" = "docker", "group" = "host"}
relabel_rules = discovery.relabel.logs_integrations_docker.rules
forward_to = [loki.process.docker.receiver]
refresh_interval = "5s"
}
loki.process "docker" {
forward_to = [otelcol.receiver.loki.loki_tosend.receiver]
stage.docker {}
}
/******* OTELCOL RECEIVER *******/
otelcol.receiver.prometheus "default" {
output {
metrics = [otelcol.processor.memory_limiter.default.input]
}
}
otelcol.receiver.loki "loki_tosend" {
output {
logs = [otelcol.processor.memory_limiter.default.input]
}
}
/******* PROCESSORS *******/
otelcol.processor.memory_limiter "default" {
check_interval = "10s"
limit = "10000MiB"
output {
metrics = [otelcol.processor.batch.default.input]
logs = [otelcol.processor.batch.default.input]
traces = [otelcol.processor.batch.default.input]
}
}
otelcol.processor.batch "default" {
send_batch_size = 10000
output {
metrics = [otelcol.exporter.otlp.export_to_monitoring.input]
logs = [otelcol.exporter.otlp.export_to_monitoring.input]
traces = [otelcol.exporter.otlp.export_to_monitoring.input]
}
}
/******* EXPORT *******/
otelcol.exporter.otlp "export_to_monitoring" {
client {
endpoint = sys.env("ALLOY_HOST") + ":4317"
tls {
insecure_skip_verify = false
cert_file = "/etc/certs/monitoring.crt"
key_file = "/etc/certs/monitoring.key"
ca_file = "/etc/certs/monitoring.crt"
}
}
}
otelcol.receiver.otlp "ingest" {
grpc {
endpoint = "0.0.0.0:4317"
tls {
cert_file = "/etc/certs/monitoring.crt"
key_file = "/etc/certs/monitoring.key"
ca_file = "/etc/certs/monitoring.crt"
insecure_skip_verify = true
}
}
http {
endpoint = "0.0.0.0:4318"
tls {
cert_file = "/etc/certs/monitoring.crt"
key_file = "/etc/certs/monitoring.key"
ca_file = "/etc/certs/monitoring.crt"
insecure_skip_verify = true
}
}
output {
metrics = [otelcol.processor.memory_limiter.default.input]
logs = [otelcol.processor.memory_limiter.default.input]
traces = [otelcol.processor.memory_limiter.default.input]
}
}
otelcol.receiver.prometheus "prom_ingest" {
output {
metrics = [otelcol.processor.memory_limiter.default.input]
logs = [otelcol.processor.memory_limiter.default.input]
traces = [otelcol.processor.memory_limiter.default.input]
}
}
// Scraping our own metrics for self-monitoring
// Scrape Tempo, Mimir, Loki, Alloy (and Grafana)
prometheus.scrape "sdc_monitoring_infra" {
// The targets array allows us to specify which service targets to scrape from.
// Define the address to scrape from, and add a 'group' and 'service' label for each target.
scheme = "https"
targets = [
{"__address__" = sys.env("MIMIR_HOST") + ":9009", group = "infrastructure", service = "mimir"},
{"__address__" = sys.env("TEMPO_HOST") + ":3200", group = "infrastructure", service = "tempo"},
{"__address__" = sys.env("LOKI_HOST") + ":3100", group = "infrastructure", service = "loki"},
{"__address__" = "localhost:12345", group = "infrastructure", service = "alloy"},
// {"__address__" = sys.env("GRAFANA_HOST")+":"+sys.env("GRAFANA_PORT"), group = "infrastructure", service = "grafana"},
]
// The job name to add to the scraped metrics.
job_name = "sdc_monitoring_infra"
tls_config {
cert_file = "/etc/certs/monitoring.crt"
key_file = "/etc/certs/monitoring.key"
ca_file = "/etc/certs/monitoring.crt"
insecure_skip_verify = true
}
// Scrape all of these services every 15 seconds.
scrape_interval = "15s"
// Send the metrics to the prometheus remote write receiver for exporting to Mimir.
forward_to = [otelcol.receiver.prometheus.prom_ingest.receiver]
}
// Ajout d'une règle de relabeling pour s'assurer que les métriques Docker sont correctement identifiées
prometheus.relabel "docker_metrics" {
forward_to = [otelcol.processor.memory_limiter.default.input]
rule {
source_labels = ["job"]
regex = "sdc_monitoring_host"
action = "keep"
}
rule {
target_label = "source"
replacement = "docker_host"
}
}
otelcol.processor.memory_limiter "default" {
check_interval = "1s"
limit = "400MiB"
output {
metrics = [otelcol.processor.batch.default.input]
logs = [otelcol.processor.batch.default.input]
traces = [otelcol.processor.batch.default.input]
}
}
otelcol.processor.batch "default" {
timeout = "10s"
send_batch_size = 10000
output {
metrics = [otelcol.exporter.prometheus.default.input]
logs = [otelcol.exporter.loki.default.input]
traces = [otelcol.exporter.otlp.default.input]
}
}
// Exports
// Loki export
otelcol.exporter.loki "default" {
forward_to = [loki.write.default.receiver]
}
loki.write "default" {
endpoint {
url = "https://"+sys.env("LOKI_HOST")+":3100/loki/api/v1/push"
tls_config {
cert_file = "/etc/certs/monitoring.crt"
key_file = "/etc/certs/monitoring.key"
ca_file = "/etc/certs/monitoring.crt"
insecure_skip_verify = true
}
}
}
// Tempo export
otelcol.exporter.otlp "default" {
client {
endpoint = sys.env("TEMPO_HOST") + ":4317"
tls {
cert_file = "/etc/certs/monitoring.crt"
key_file = "/etc/certs/monitoring.key"
ca_file = "/etc/certs/monitoring.crt"
insecure_skip_verify = true
}
}
}
// Mimir export
otelcol.exporter.prometheus "default" {
forward_to = [prometheus.remote_write.default.receiver]
}
prometheus.remote_write "default" {
endpoint {
url = "https://"+sys.env("MIMIR_HOST")+":9009/api/v1/push"
tls_config {
cert_file = "/etc/certs/monitoring.crt"
key_file = "/etc/certs/monitoring.key"
ca_file = "/etc/certs/monitoring.crt"
insecure_skip_verify = true
}
}
}
alloy_host:
container_name: ${PROJECT_NAME}-alloy-host
hostname: ${PROJECT_NAME}-alloy-host
image: grafana/alloy:latest
# command: run --server.http.listen-addr=0.0.0.0:12345 --storage.path=/var/lib/alloy/data /etc/alloy/config_host.alloy
command: run --server.http.listen-addr=0.0.0.0:12345 --storage.path=/var/lib/alloy/data /etc/alloy/config_host.alloy --stability.level=experimental
environment:
ALLOY_HOST: ${ALLOY_HOST}
volumes:
- ../host/alloy:/etc/alloy
# CAdvisor mounts
- /:/rootfs
- /var/run:/var/run:ro
- /sys:/rootfs/sys:ro
- /proc:/rootfs/proc:ro
- /var/lib/docker/:/var/lib/docker:ro
- /var/run/docker.sock:/rootfs/var/run/docker.sock
- /dev/disk/:/dev/disk:ro
# - /var/run/docker/metrics.sock:/host/var/run/docker/metrics.sock:rw
- ${CERTIFICATE_DIRECTORY}:/etc/certs
networks:
- internal
privileged: true
volumes:
alloy-data:
networks:
internal:
name: ${PROJECT_NAME}-internal-network-${ENV}
external: true