I’m trying to get container metrics with prometheus.exporter.cadvisor
but there are permission issues I’d like to understand. Specifically
journal entry:
Jul 28 13:00:18 relayplan-core-prd alloy[107752]: ts=2025-07-28T13:00:18.936333402Z level=error msg="Failed to create existing container: /system.slice/docker-a611223ca8ee48593472e85a8488614f75761dda48e4489d15d73fa5d9cd8bbd.scope: failed to identify the read-write layer ID for container \"a611223ca8ee48593472e85a8488614f75761dda48e4489d15d73fa5d9cd8bbd\". - open /var/lib/docker/image/overlay2/layerdb/mounts/a611223ca8ee48593472e85a8488614f75761dda48e4489d15d73fa5d9cd8bbd/mount-id: permission denied" component_path=/ component_id=prometheus.exporter.cadvisor.docker func=Errorf
Alloy is installed directly on the Ubuntu host via official repos and uses the provided alloy.service
unit file. It runs as the alloy
user so I’ve tried adding this user to the docker
group but the error remains.
That said, there is no problem scraping logs for Loki with discovery.docker
and scraping system metrics with prometheus.exporter.unix
. Here’s my full config:
logging {
level = "info"
}
// TARGETS
prometheus.remote_write "default" {
endpoint {
url = "http://10.0.0.5:9090/api/v1/write"
}
}
loki.write "default" {
endpoint {
url = "http://10.0.0.5:3100/loki/api/v1/push"
}
external_labels = {}
}
// SYSTEM MERICS
prometheus.exporter.unix "system" {
disable_collectors = ["ipvs", "btrfs", "infiniband", "xfs", "zfs"]
enable_collectors = ["meminfo"]
filesystem {
fs_types_exclude = "^(autofs|binfmt_misc|bpf|cgroup2?|configfs|debugfs|devpts|devtmpfs|tmpfs|fusectl|hugetlbfs|iso9660|mqueue|nsfs|overlay|proc|procfs|pstore|rpc_pipefs|securityfs|selinuxfs|squashfs|sysfs|tracefs)$"
mount_points_exclude = "^/(dev|proc|run/credentials/.+|sys|var/lib/docker/.+)($|/)"
mount_timeout = "5s"
}
netclass {
ignored_devices = "^(veth.*|cali.*|[a-f0-9]{15})$"
}
netdev {
device_exclude = "^(veth.*|cali.*|[a-f0-9]{15})$"
}
}
discovery.relabel "system" {
targets = prometheus.exporter.unix.system.targets
rule {
target_label = "instance"
replacement = constants.hostname
}
rule {
target_label = "job"
replacement = string.format("%s-metrics", constants.hostname)
}
}
prometheus.scrape "system" {
scrape_interval = "15s"
targets = discovery.relabel.system.output
forward_to = [prometheus.remote_write.default.receiver]
}
// DOCKER METRICS
prometheus.exporter.cadvisor "docker" {
docker_host = "unix:///run/docker.sock"
docker_only = true
storage_duration = "5m"
}
discovery.relabel "docker" {
targets = prometheus.exporter.cadvisor.docker.targets
rule {
target_label = "instance"
replacement = constants.hostname
}
rule {
target_label = "job"
replacement = string.format("%s-docker", constants.hostname)
}
}
prometheus.scrape "docker" {
scrape_interval = "10s"
targets = discovery.relabel.docker.output
forward_to = [prometheus.remote_write.default.receiver]
}
// DOCKER LOGS
discovery.docker "logs" {
host = "unix:///run/docker.sock"
}
discovery.relabel "logs" {
targets = []
rule {
source_labels = ["__meta_docker_container_name"]
regex = "/(.*)"
target_label = "service_name"
}
}
loki.source.docker "default" {
host = "unix:///run/docker.sock"
targets = discovery.docker.logs.targets
labels = {"platform" = "docker"}
relabel_rules = discovery.relabel.logs.rules
forward_to = [loki.write.default.receiver]
}
I would like to understand what’s going on.