I’m trying to get container metrics with prometheus.exporter.cadvisor but there are permission issues I’d like to understand. Specifically
journal entry:
Jul 28 13:00:18 relayplan-core-prd alloy[107752]: ts=2025-07-28T13:00:18.936333402Z level=error msg="Failed to create existing container: /system.slice/docker-a611223ca8ee48593472e85a8488614f75761dda48e4489d15d73fa5d9cd8bbd.scope: failed to identify the read-write layer ID for container \"a611223ca8ee48593472e85a8488614f75761dda48e4489d15d73fa5d9cd8bbd\". - open /var/lib/docker/image/overlay2/layerdb/mounts/a611223ca8ee48593472e85a8488614f75761dda48e4489d15d73fa5d9cd8bbd/mount-id: permission denied" component_path=/ component_id=prometheus.exporter.cadvisor.docker func=Errorf
Alloy is installed directly on the Ubuntu host via official repos and uses the provided alloy.service unit file. It runs as the alloy user so I’ve tried adding this user to the docker group but the error remains.
That said, there is no problem scraping logs for Loki with discovery.docker and scraping system metrics with prometheus.exporter.unix. Here’s my full config:
logging {
level = "info"
}
// TARGETS
prometheus.remote_write "default" {
endpoint {
url = "http://10.0.0.5:9090/api/v1/write"
}
}
loki.write "default" {
endpoint {
url = "http://10.0.0.5:3100/loki/api/v1/push"
}
external_labels = {}
}
// SYSTEM MERICS
prometheus.exporter.unix "system" {
disable_collectors = ["ipvs", "btrfs", "infiniband", "xfs", "zfs"]
enable_collectors = ["meminfo"]
filesystem {
fs_types_exclude = "^(autofs|binfmt_misc|bpf|cgroup2?|configfs|debugfs|devpts|devtmpfs|tmpfs|fusectl|hugetlbfs|iso9660|mqueue|nsfs|overlay|proc|procfs|pstore|rpc_pipefs|securityfs|selinuxfs|squashfs|sysfs|tracefs)$"
mount_points_exclude = "^/(dev|proc|run/credentials/.+|sys|var/lib/docker/.+)($|/)"
mount_timeout = "5s"
}
netclass {
ignored_devices = "^(veth.*|cali.*|[a-f0-9]{15})$"
}
netdev {
device_exclude = "^(veth.*|cali.*|[a-f0-9]{15})$"
}
}
discovery.relabel "system" {
targets = prometheus.exporter.unix.system.targets
rule {
target_label = "instance"
replacement = constants.hostname
}
rule {
target_label = "job"
replacement = string.format("%s-metrics", constants.hostname)
}
}
prometheus.scrape "system" {
scrape_interval = "15s"
targets = discovery.relabel.system.output
forward_to = [prometheus.remote_write.default.receiver]
}
// DOCKER METRICS
prometheus.exporter.cadvisor "docker" {
docker_host = "unix:///run/docker.sock"
docker_only = true
storage_duration = "5m"
}
discovery.relabel "docker" {
targets = prometheus.exporter.cadvisor.docker.targets
rule {
target_label = "instance"
replacement = constants.hostname
}
rule {
target_label = "job"
replacement = string.format("%s-docker", constants.hostname)
}
}
prometheus.scrape "docker" {
scrape_interval = "10s"
targets = discovery.relabel.docker.output
forward_to = [prometheus.remote_write.default.receiver]
}
// DOCKER LOGS
discovery.docker "logs" {
host = "unix:///run/docker.sock"
}
discovery.relabel "logs" {
targets = []
rule {
source_labels = ["__meta_docker_container_name"]
regex = "/(.*)"
target_label = "service_name"
}
}
loki.source.docker "default" {
host = "unix:///run/docker.sock"
targets = discovery.docker.logs.targets
labels = {"platform" = "docker"}
relabel_rules = discovery.relabel.logs.rules
forward_to = [loki.write.default.receiver]
}
I would like to understand what’s going on.