Windows exporter component crash when scrapping scheduled tasks

Hello everyone,

I’m trying to use grafana alloy to generate metrics for my windows vms.
So far I’ve got most of my alloy config working.
My only issue, which is pretty deal breaking, is that when I try to enable the scheduled tasks exporter from the prometheus.exporter.windows component, Alloy start to crash after a few minutes…
Metrics get sent for some time, even scheduled tasks ones, but at some point it just crash with a huge stack trace of a few thousands stack trace.

So far I could only test this on a Windows 11 23H2 and Windows Server 2019 Datacenter but both are showing this issue.

Here is an extract that I think can help, of the stacktrace:

Exception 0xc0000005 0x0 0x7ff9a689f2b0 0x7ff9b1c76511
PC=0x7ff9b1c76511
signal arrived during external code execution

runtime.cgocall(0x84d340, 0xc006b1ec08)
        /usr/local/go/src/runtime/cgocall.go:157 +0x3e fp=0xc003a411d0 sp=0xc003a41198 pc=0x7d92de
syscall.SyscallN(0xc006b1e808?, {0xc003a41268?, 0x0?, 0x40c?})
        /usr/local/go/src/runtime/syscall_windows.go:544 +0x107 fp=0xc003a41248 sp=0xc003a411d0 pc=0x848527
syscall.Syscall6(0x63e2180?, 0x0?, 0x0?, 0x0?, 0x84855a?, 0x84d340?, 0xc006b1ec08?, 0x0?)
        /usr/local/go/src/runtime/syscall_windows.go:488 +0x4a fp=0xc003a412a8 sp=0xc003a41248 pc=0x8481ca
github.com/go-ole/go-ole.getIDsOfName(0x12f6c3759a0, {0xc003a41368, 0x1, 0x40c?})
        /go/pkg/mod/github.com/go-ole/go-ole@v1.3.0/idispatch_windows.go:20 +0x167 fp=0xc003a41348 sp=0xc003a412a8 pc=0x36a43a7
github.com/go-ole/go-ole.(*IDispatch).GetIDsOfName(...)
        /go/pkg/mod/github.com/go-ole/go-ole@v1.3.0/idispatch.go:22
github.com/go-ole/go-ole.(*IDispatch).GetSingleIDOfName(0x93c2188?, {0x8126eef?, 0xc003a41438?})
        /go/pkg/mod/github.com/go-ole/go-ole@v1.3.0/idispatch.go:47 +0x35 fp=0xc003a41388 sp=0xc003a41348 pc=0x36a3f75
github.com/go-ole/go-ole.(*IDispatch).InvokeWithOptionalArgs(0x12f6c3759a0, {0x8126eef?, 0x0?}, 0x2, {0x0, 0x0, 0x0})
        /go/pkg/mod/github.com/go-ole/go-ole@v1.3.0/idispatch.go:63 +0x35 fp=0xc003a413c0 sp=0xc003a41388 pc=0x36a4015
github.com/go-ole/go-ole.(*IDispatch).GetProperty(...)
        /go/pkg/mod/github.com/go-ole/go-ole@v1.3.0/idispatch.go:88
github.com/go-ole/go-ole/oleutil.ForEach(0x12f6c3776f0?, 0xc003a41530)
        /go/pkg/mod/github.com/go-ole/go-ole@v1.3.0/oleutil/oleutil.go:106 +0x58 fp=0xc003a414c0 sp=0xc003a413c0 pc=0x36a9c18
github.com/prometheus-community/windows_exporter/pkg/collector/scheduled_task.fetchTasksInFolder(0xc003a415b8?, 0xc003a41be8)
        /go/pkg/mod/github.com/prometheus-community/windows_exporter@v0.25.2-0.20240425065806-f6b91e5cb0f7/pkg/collector/scheduled_task/scheduled_task.go:274 +0xee fp=0xc003a41558 sp=0xc003a414c0 pc=0x5b6ad6e
github.com/prometheus-community/windows_exporter/pkg/collector/scheduled_task.fetchTasksRecursively(0x12f6c3776f0, 0xc003a41be8)
        /go/pkg/mod/github.com/prometheus-community/windows_exporter@v0.25.2-0.20240425065806-f6b91e5cb0f7/pkg/collector/scheduled_task/scheduled_task.go:292 +0x45 fp=0xc003a415f0 sp=0xc003a41558 pc=0x5b6b185
github.com/prometheus-community/windows_exporter/pkg/collector/scheduled_task.fetchTasksRecursively.func1(0x9?)
        /go/pkg/mod/github.com/prometheus-community/windows_exporter@v0.25.2-0.20240425065806-f6b91e5cb0f7/pkg/collector/scheduled_task/scheduled_task.go:307 +0x66 fp=0xc003a41640 sp=0xc003a415f0 pc=0x5b6b346
github.com/go-ole/go-ole/oleutil.ForEach(0x12f6c378070?, 0xc003a417b0)
        /go/pkg/mod/github.com/go-ole/go-ole@v1.3.0/oleutil/oleutil.go:122 +0x296 fp=0xc003a41740 sp=0xc003a41640 pc=0x36a9e56
github.com/prometheus-community/windows_exporter/pkg/collector/scheduled_task.fetchTasksRecursively(0x12f6c378070, 0xc003a41be8)
        /go/pkg/mod/github.com/prometheus-community/windows_exporter@v0.25.2-0.20240425065806-f6b91e5cb0f7/pkg/collector/scheduled_task/scheduled_task.go:304 +0x10e fp=0xc003a417d8 sp=0xc003a41740 pc=0x5b6b24e
github.com/prometheus-community/windows_exporter/pkg/collector/scheduled_task.fetchTasksRecursively.func1(0x9?)
        /go/pkg/mod/github.com/prometheus-community/windows_exporter@v0.25.2-0.20240425065806-f6b91e5cb0f7/pkg/collector/scheduled_task/scheduled_task.go:307 +0x66 fp=0xc003a41828 sp=0xc003a417d8 pc=0x5b6b346
github.com/go-ole/go-ole/oleutil.ForEach(0x12f6c377bf0?, 0xc003a41998)
        /go/pkg/mod/github.com/go-ole/go-ole@v1.3.0/oleutil/oleutil.go:122 +0x296 fp=0xc003a41928 sp=0xc003a41828 pc=0x36a9e56
github.com/prometheus-community/windows_exporter/pkg/collector/scheduled_task.fetchTasksRecursively(0x12f6c377bf0, 0xc003a41be8)
        /go/pkg/mod/github.com/prometheus-community/windows_exporter@v0.25.2-0.20240425065806-f6b91e5cb0f7/pkg/collector/scheduled_task/scheduled_task.go:304 +0x10e fp=0xc003a419c0 sp=0xc003a41928 pc=0x5b6b24e
github.com/prometheus-community/windows_exporter/pkg/collector/scheduled_task.fetchTasksRecursively.func1(0x9?)
        /go/pkg/mod/github.com/prometheus-community/windows_exporter@v0.25.2-0.20240425065806-f6b91e5cb0f7/pkg/collector/scheduled_task/scheduled_task.go:307 +0x66 fp=0xc003a41a10 sp=0xc003a419c0 pc=0x5b6b346
github.com/go-ole/go-ole/oleutil.ForEach(0x12f6c377ef0?, 0xc003a41b80)
        /go/pkg/mod/github.com/go-ole/go-ole@v1.3.0/oleutil/oleutil.go:122 +0x296 fp=0xc003a41b10 sp=0xc003a41a10 pc=0x36a9e56
github.com/prometheus-community/windows_exporter/pkg/collector/scheduled_task.fetchTasksRecursively(0x12f6c377ef0, 0xc003a41be8)
        /go/pkg/mod/github.com/prometheus-community/windows_exporter@v0.25.2-0.20240425065806-f6b91e5cb0f7/pkg/collector/scheduled_task/scheduled_task.go:304 +0x10e fp=0xc003a41ba8 sp=0xc003a41b10 pc=0x5b6b24e
github.com/prometheus-community/windows_exporter/pkg/collector/scheduled_task.getScheduledTasks()
        /go/pkg/mod/github.com/prometheus-community/windows_exporter@v0.25.2-0.20240425065806-f6b91e5cb0f7/pkg/collector/scheduled_task/scheduled_task.go:260 +0x211 fp=0xc003a41c80 sp=0xc003a41ba8 pc=0x5b6aa11
github.com/prometheus-community/windows_exporter/pkg/collector/scheduled_task.(*collector).collect(0xc003478870, 0xc0056896e0)
        /go/pkg/mod/github.com/prometheus-community/windows_exporter@v0.25.2-0.20240425065806-f6b91e5cb0f7/pkg/collector/scheduled_task/scheduled_task.go:178 +0x32 fp=0xc003a41db8 sp=0xc003a41c80 pc=0x5b6a372
github.com/prometheus-community/windows_exporter/pkg/collector/scheduled_task.(*collector).Collect(0xc003478870, 0xe?, 0x4?)
        /go/pkg/mod/github.com/prometheus-community/windows_exporter@v0.25.2-0.20240425065806-f6b91e5cb0f7/pkg/collector/scheduled_task/scheduled_task.go:167 +0x25 fp=0xc003a41e38 sp=0xc003a41db8 pc=0x5b6a1a5
github.com/prometheus-community/windows_exporter/pkg/collector.(*Prometheus).execute(0xc003036d80, {0x8146621, 0xe}, {0x947f840, 0xc003478870}, 0xc00559ad18, 0xc0056896e0)
        /go/pkg/mod/github.com/prometheus-community/windows_exporter@v0.25.2-0.20240425065806-f6b91e5cb0f7/pkg/collector/prometheus.go:176 +0x8f fp=0xc003a41f28 sp=0xc003a41e38 pc=0x5b9136f
github.com/prometheus-community/windows_exporter/pkg/collector.(*Prometheus).Collect.func2({0x8146621, 0xe}, {0x947f840?, 0xc003478870?})
        /go/pkg/mod/github.com/prometheus-community/windows_exporter@v0.25.2-0.20240425065806-f6b91e5cb0f7/pkg/collector/prometheus.go:117 +0xa5 fp=0xc003a41fb0 sp=0xc003a41f28 pc=0x5b91085
github.com/prometheus-community/windows_exporter/pkg/collector.(*Prometheus).Collect.gowrap1()
        /go/pkg/mod/github.com/prometheus-community/windows_exporter@v0.25.2-0.20240425065806-f6b91e5cb0f7/pkg/collector/prometheus.go:123 +0x30 fp=0xc003a41fe0 sp=0xc003a41fb0 pc=0x5b90fb0
runtime.goexit({})
        /usr/local/go/src/runtime/asm_amd64.s:1695 +0x1 fp=0xc003a41fe8 sp=0xc003a41fe0 pc=0x84bb81
created by github.com/prometheus-community/windows_exporter/pkg/collector.(*Prometheus).Collect in goroutine 6781
        /go/pkg/mod/github.com/prometheus-community/windows_exporter@v0.25.2-0.20240425065806-f6b91e5cb0f7/pkg/collector/prometheus.go:115 +0x470

And here is my full config :

config.alloy

import.file "filters" {
  filename = "./filters.alloy"
}


filters.config "default" {}

prometheus.exporter.windows "host" {
	enabled_collectors = [
		"cpu",
		"service",
		"cs",
		"logical_disk",
		"net",
		"os",
		"system",
    "scheduled_task",
	]
}

prometheus.scrape "host"  {
  targets = prometheus.exporter.windows.host.targets
  forward_to = [
    prometheus.relabel.services.receiver,
    prometheus.relabel.drop_services.receiver,
  ]
}

prometheus.relabel "services" {
  forward_to = [prometheus.relabel.tag.receiver]

  rule {
    action = "keep"
    source_labels = ["__name__"]
    regex = "windows_service_.+"
  }

  rule {
    action = "keep"
    source_labels = ["name"]
    regex = join(filters.config.default.services, "|")
  }

  rule {
    action = "replace"
    target_label = "alert"
    replacement = "critical"
  }

  rule {
    action = "keep"
    source_labels = ["name"]
    regex = join(filters.config.default.services, "|")
  }
}

prometheus.relabel "drop_services" {
  forward_to = [prometheus.relabel.tag.receiver]

  rule {
    action = "drop"
    source_labels = ["__name__"]
    regex = "windows_service_.+"
  }
}


prometheus.exporter.mssql "mssql_jobs" {
  connection_string = "sqlserver://@localhost:1433?authenticator=winsspi"
  query_config = `
collector_name: mssql_standard
metrics:
  - metric_name: mssql_jobs_success
    type: gauge
    help: 'Status of the last MSSQL job'
    key_labels:
      - JobName
    values: [Last_run_Status]
    query: |
      SELECT distinct
          j.name AS JobName,
          CASE
              WHEN h.run_status = 1 THEN 1  -- Success
          when h.run_status is null THEN 999 -- If the job never run
              ELSE 0  -- Failure or other status
          END AS Last_run_Status
      FROM
          msdb.dbo.sysjobs j
      LEFT JOIN
          msdb.dbo.sysjobhistory h ON j.job_id = h.job_id
      INNER JOIN
          (
              SELECT
                  job_id,
                  MAX(run_date + run_time) AS LastRunDateTime
              FROM
                  msdb.dbo.sysjobhistory
              WHERE
                  step_id = 0
              GROUP BY
                  job_id
          ) AS last_run ON h.job_id = last_run.job_id AND h.run_date + h.run_time = last_run.LastRunDateTime
      ORDER BY
          j.name;
  `
}

prometheus.exporter.mssql "mssql" {
  connection_string = "sqlserver://@localhost:1433?authenticator=winsspi"
}

prometheus.scrape "mssql_jobs" {
  targets = prometheus.exporter.mssql.mssql_jobs.targets
  forward_to = [prometheus.relabel.sql_jobs.receiver]
  scrape_interval = "10m"
}

prometheus.scrape "mssql_host" {
  targets = prometheus.exporter.mssql.mssql.targets
  forward_to = [prometheus.relabel.tag.receiver]
}

prometheus.relabel "sql_jobs" {
  forward_to = [prometheus.relabel.tag.receiver]

  rule {
    action = "keep"
    source_labels = ["JobName"]
    regex = join(filters.config.default.sql_jobs, "|")
  }

  rule {
    action = "replace"
    target_label = "alert"
    replacement = "critical"
  }
}

prometheus.relabel "tag" {
  rule {
    action = "replace"
    target_label = "job"
    replacement = "integration/windows"
  }

  forward_to = [prometheus.remote_write.prometheus.receiver]
}


prometheus.remote_write "prometheus" {
  endpoint {
    url = "https://mimir.example.com/api/v1/metrics/write"
  }
}

loki.write "loki" {
  endpoint {
    url = "https://loki.example.com/loki/api/v1/push"

  }
}

loki.source.windowsevent "events_application" {
  eventlog_name = "Application"
  forward_to = [loki.relabel.events.receiver]
}

loki.source.windowsevent "events_security" {
  eventlog_name = "Security"
  forward_to = [loki.relabel.events.receiver]
}

loki.source.windowsevent "events_setup" {
  eventlog_name = "Setup"
  forward_to = [loki.relabel.events.receiver]
}

loki.source.windowsevent "events_system" {
  eventlog_name = "System"
  forward_to = [loki.relabel.events.receiver]
}

loki.relabel "events" {
  forward_to = [loki.process.events.receiver]
  rule {
    action = "replace"
    replacement = "integration/windows"
    target_label = "job"
  }
}

loki.process "events" {
  forward_to = [loki.write.loki.receiver]

  stage.json {
    expressions = {
      level = "",
      source = "",
    }
  }

  stage.labels {
    values = {
      level = "",
      source = "",
    }
  }
}

filters.alloy

declare "config" {
  // Regex to match service names
  // Service name in lowercase
  export "services" {
    value = [
      "alloy",
    ]
  }

  // Regex to match scheduled task names
  export "tasks" {
    value = [
      "/Microsoft/.+",
    ]
  }

  // Regex to match sql jobs
  export "sql_jobs" {
    value = [
      "Integration-.+",
      "Integration_.+",
    ]
  }
}
1 Like