Hello everyone,
I’m trying to use grafana alloy to generate metrics for my windows vms.
So far I’ve got most of my alloy config working.
My only issue, which is pretty deal breaking, is that when I try to enable the scheduled tasks exporter from the prometheus.exporter.windows
component, Alloy start to crash after a few minutes…
Metrics get sent for some time, even scheduled tasks ones, but at some point it just crash with a huge stack trace of a few thousands stack trace.
So far I could only test this on a Windows 11 23H2 and Windows Server 2019 Datacenter but both are showing this issue.
Here is an extract that I think can help, of the stacktrace:
Exception 0xc0000005 0x0 0x7ff9a689f2b0 0x7ff9b1c76511
PC=0x7ff9b1c76511
signal arrived during external code execution
runtime.cgocall(0x84d340, 0xc006b1ec08)
/usr/local/go/src/runtime/cgocall.go:157 +0x3e fp=0xc003a411d0 sp=0xc003a41198 pc=0x7d92de
syscall.SyscallN(0xc006b1e808?, {0xc003a41268?, 0x0?, 0x40c?})
/usr/local/go/src/runtime/syscall_windows.go:544 +0x107 fp=0xc003a41248 sp=0xc003a411d0 pc=0x848527
syscall.Syscall6(0x63e2180?, 0x0?, 0x0?, 0x0?, 0x84855a?, 0x84d340?, 0xc006b1ec08?, 0x0?)
/usr/local/go/src/runtime/syscall_windows.go:488 +0x4a fp=0xc003a412a8 sp=0xc003a41248 pc=0x8481ca
github.com/go-ole/go-ole.getIDsOfName(0x12f6c3759a0, {0xc003a41368, 0x1, 0x40c?})
/go/pkg/mod/github.com/go-ole/go-ole@v1.3.0/idispatch_windows.go:20 +0x167 fp=0xc003a41348 sp=0xc003a412a8 pc=0x36a43a7
github.com/go-ole/go-ole.(*IDispatch).GetIDsOfName(...)
/go/pkg/mod/github.com/go-ole/go-ole@v1.3.0/idispatch.go:22
github.com/go-ole/go-ole.(*IDispatch).GetSingleIDOfName(0x93c2188?, {0x8126eef?, 0xc003a41438?})
/go/pkg/mod/github.com/go-ole/go-ole@v1.3.0/idispatch.go:47 +0x35 fp=0xc003a41388 sp=0xc003a41348 pc=0x36a3f75
github.com/go-ole/go-ole.(*IDispatch).InvokeWithOptionalArgs(0x12f6c3759a0, {0x8126eef?, 0x0?}, 0x2, {0x0, 0x0, 0x0})
/go/pkg/mod/github.com/go-ole/go-ole@v1.3.0/idispatch.go:63 +0x35 fp=0xc003a413c0 sp=0xc003a41388 pc=0x36a4015
github.com/go-ole/go-ole.(*IDispatch).GetProperty(...)
/go/pkg/mod/github.com/go-ole/go-ole@v1.3.0/idispatch.go:88
github.com/go-ole/go-ole/oleutil.ForEach(0x12f6c3776f0?, 0xc003a41530)
/go/pkg/mod/github.com/go-ole/go-ole@v1.3.0/oleutil/oleutil.go:106 +0x58 fp=0xc003a414c0 sp=0xc003a413c0 pc=0x36a9c18
github.com/prometheus-community/windows_exporter/pkg/collector/scheduled_task.fetchTasksInFolder(0xc003a415b8?, 0xc003a41be8)
/go/pkg/mod/github.com/prometheus-community/windows_exporter@v0.25.2-0.20240425065806-f6b91e5cb0f7/pkg/collector/scheduled_task/scheduled_task.go:274 +0xee fp=0xc003a41558 sp=0xc003a414c0 pc=0x5b6ad6e
github.com/prometheus-community/windows_exporter/pkg/collector/scheduled_task.fetchTasksRecursively(0x12f6c3776f0, 0xc003a41be8)
/go/pkg/mod/github.com/prometheus-community/windows_exporter@v0.25.2-0.20240425065806-f6b91e5cb0f7/pkg/collector/scheduled_task/scheduled_task.go:292 +0x45 fp=0xc003a415f0 sp=0xc003a41558 pc=0x5b6b185
github.com/prometheus-community/windows_exporter/pkg/collector/scheduled_task.fetchTasksRecursively.func1(0x9?)
/go/pkg/mod/github.com/prometheus-community/windows_exporter@v0.25.2-0.20240425065806-f6b91e5cb0f7/pkg/collector/scheduled_task/scheduled_task.go:307 +0x66 fp=0xc003a41640 sp=0xc003a415f0 pc=0x5b6b346
github.com/go-ole/go-ole/oleutil.ForEach(0x12f6c378070?, 0xc003a417b0)
/go/pkg/mod/github.com/go-ole/go-ole@v1.3.0/oleutil/oleutil.go:122 +0x296 fp=0xc003a41740 sp=0xc003a41640 pc=0x36a9e56
github.com/prometheus-community/windows_exporter/pkg/collector/scheduled_task.fetchTasksRecursively(0x12f6c378070, 0xc003a41be8)
/go/pkg/mod/github.com/prometheus-community/windows_exporter@v0.25.2-0.20240425065806-f6b91e5cb0f7/pkg/collector/scheduled_task/scheduled_task.go:304 +0x10e fp=0xc003a417d8 sp=0xc003a41740 pc=0x5b6b24e
github.com/prometheus-community/windows_exporter/pkg/collector/scheduled_task.fetchTasksRecursively.func1(0x9?)
/go/pkg/mod/github.com/prometheus-community/windows_exporter@v0.25.2-0.20240425065806-f6b91e5cb0f7/pkg/collector/scheduled_task/scheduled_task.go:307 +0x66 fp=0xc003a41828 sp=0xc003a417d8 pc=0x5b6b346
github.com/go-ole/go-ole/oleutil.ForEach(0x12f6c377bf0?, 0xc003a41998)
/go/pkg/mod/github.com/go-ole/go-ole@v1.3.0/oleutil/oleutil.go:122 +0x296 fp=0xc003a41928 sp=0xc003a41828 pc=0x36a9e56
github.com/prometheus-community/windows_exporter/pkg/collector/scheduled_task.fetchTasksRecursively(0x12f6c377bf0, 0xc003a41be8)
/go/pkg/mod/github.com/prometheus-community/windows_exporter@v0.25.2-0.20240425065806-f6b91e5cb0f7/pkg/collector/scheduled_task/scheduled_task.go:304 +0x10e fp=0xc003a419c0 sp=0xc003a41928 pc=0x5b6b24e
github.com/prometheus-community/windows_exporter/pkg/collector/scheduled_task.fetchTasksRecursively.func1(0x9?)
/go/pkg/mod/github.com/prometheus-community/windows_exporter@v0.25.2-0.20240425065806-f6b91e5cb0f7/pkg/collector/scheduled_task/scheduled_task.go:307 +0x66 fp=0xc003a41a10 sp=0xc003a419c0 pc=0x5b6b346
github.com/go-ole/go-ole/oleutil.ForEach(0x12f6c377ef0?, 0xc003a41b80)
/go/pkg/mod/github.com/go-ole/go-ole@v1.3.0/oleutil/oleutil.go:122 +0x296 fp=0xc003a41b10 sp=0xc003a41a10 pc=0x36a9e56
github.com/prometheus-community/windows_exporter/pkg/collector/scheduled_task.fetchTasksRecursively(0x12f6c377ef0, 0xc003a41be8)
/go/pkg/mod/github.com/prometheus-community/windows_exporter@v0.25.2-0.20240425065806-f6b91e5cb0f7/pkg/collector/scheduled_task/scheduled_task.go:304 +0x10e fp=0xc003a41ba8 sp=0xc003a41b10 pc=0x5b6b24e
github.com/prometheus-community/windows_exporter/pkg/collector/scheduled_task.getScheduledTasks()
/go/pkg/mod/github.com/prometheus-community/windows_exporter@v0.25.2-0.20240425065806-f6b91e5cb0f7/pkg/collector/scheduled_task/scheduled_task.go:260 +0x211 fp=0xc003a41c80 sp=0xc003a41ba8 pc=0x5b6aa11
github.com/prometheus-community/windows_exporter/pkg/collector/scheduled_task.(*collector).collect(0xc003478870, 0xc0056896e0)
/go/pkg/mod/github.com/prometheus-community/windows_exporter@v0.25.2-0.20240425065806-f6b91e5cb0f7/pkg/collector/scheduled_task/scheduled_task.go:178 +0x32 fp=0xc003a41db8 sp=0xc003a41c80 pc=0x5b6a372
github.com/prometheus-community/windows_exporter/pkg/collector/scheduled_task.(*collector).Collect(0xc003478870, 0xe?, 0x4?)
/go/pkg/mod/github.com/prometheus-community/windows_exporter@v0.25.2-0.20240425065806-f6b91e5cb0f7/pkg/collector/scheduled_task/scheduled_task.go:167 +0x25 fp=0xc003a41e38 sp=0xc003a41db8 pc=0x5b6a1a5
github.com/prometheus-community/windows_exporter/pkg/collector.(*Prometheus).execute(0xc003036d80, {0x8146621, 0xe}, {0x947f840, 0xc003478870}, 0xc00559ad18, 0xc0056896e0)
/go/pkg/mod/github.com/prometheus-community/windows_exporter@v0.25.2-0.20240425065806-f6b91e5cb0f7/pkg/collector/prometheus.go:176 +0x8f fp=0xc003a41f28 sp=0xc003a41e38 pc=0x5b9136f
github.com/prometheus-community/windows_exporter/pkg/collector.(*Prometheus).Collect.func2({0x8146621, 0xe}, {0x947f840?, 0xc003478870?})
/go/pkg/mod/github.com/prometheus-community/windows_exporter@v0.25.2-0.20240425065806-f6b91e5cb0f7/pkg/collector/prometheus.go:117 +0xa5 fp=0xc003a41fb0 sp=0xc003a41f28 pc=0x5b91085
github.com/prometheus-community/windows_exporter/pkg/collector.(*Prometheus).Collect.gowrap1()
/go/pkg/mod/github.com/prometheus-community/windows_exporter@v0.25.2-0.20240425065806-f6b91e5cb0f7/pkg/collector/prometheus.go:123 +0x30 fp=0xc003a41fe0 sp=0xc003a41fb0 pc=0x5b90fb0
runtime.goexit({})
/usr/local/go/src/runtime/asm_amd64.s:1695 +0x1 fp=0xc003a41fe8 sp=0xc003a41fe0 pc=0x84bb81
created by github.com/prometheus-community/windows_exporter/pkg/collector.(*Prometheus).Collect in goroutine 6781
/go/pkg/mod/github.com/prometheus-community/windows_exporter@v0.25.2-0.20240425065806-f6b91e5cb0f7/pkg/collector/prometheus.go:115 +0x470
And here is my full config :
config.alloy
import.file "filters" {
filename = "./filters.alloy"
}
filters.config "default" {}
prometheus.exporter.windows "host" {
enabled_collectors = [
"cpu",
"service",
"cs",
"logical_disk",
"net",
"os",
"system",
"scheduled_task",
]
}
prometheus.scrape "host" {
targets = prometheus.exporter.windows.host.targets
forward_to = [
prometheus.relabel.services.receiver,
prometheus.relabel.drop_services.receiver,
]
}
prometheus.relabel "services" {
forward_to = [prometheus.relabel.tag.receiver]
rule {
action = "keep"
source_labels = ["__name__"]
regex = "windows_service_.+"
}
rule {
action = "keep"
source_labels = ["name"]
regex = join(filters.config.default.services, "|")
}
rule {
action = "replace"
target_label = "alert"
replacement = "critical"
}
rule {
action = "keep"
source_labels = ["name"]
regex = join(filters.config.default.services, "|")
}
}
prometheus.relabel "drop_services" {
forward_to = [prometheus.relabel.tag.receiver]
rule {
action = "drop"
source_labels = ["__name__"]
regex = "windows_service_.+"
}
}
prometheus.exporter.mssql "mssql_jobs" {
connection_string = "sqlserver://@localhost:1433?authenticator=winsspi"
query_config = `
collector_name: mssql_standard
metrics:
- metric_name: mssql_jobs_success
type: gauge
help: 'Status of the last MSSQL job'
key_labels:
- JobName
values: [Last_run_Status]
query: |
SELECT distinct
j.name AS JobName,
CASE
WHEN h.run_status = 1 THEN 1 -- Success
when h.run_status is null THEN 999 -- If the job never run
ELSE 0 -- Failure or other status
END AS Last_run_Status
FROM
msdb.dbo.sysjobs j
LEFT JOIN
msdb.dbo.sysjobhistory h ON j.job_id = h.job_id
INNER JOIN
(
SELECT
job_id,
MAX(run_date + run_time) AS LastRunDateTime
FROM
msdb.dbo.sysjobhistory
WHERE
step_id = 0
GROUP BY
job_id
) AS last_run ON h.job_id = last_run.job_id AND h.run_date + h.run_time = last_run.LastRunDateTime
ORDER BY
j.name;
`
}
prometheus.exporter.mssql "mssql" {
connection_string = "sqlserver://@localhost:1433?authenticator=winsspi"
}
prometheus.scrape "mssql_jobs" {
targets = prometheus.exporter.mssql.mssql_jobs.targets
forward_to = [prometheus.relabel.sql_jobs.receiver]
scrape_interval = "10m"
}
prometheus.scrape "mssql_host" {
targets = prometheus.exporter.mssql.mssql.targets
forward_to = [prometheus.relabel.tag.receiver]
}
prometheus.relabel "sql_jobs" {
forward_to = [prometheus.relabel.tag.receiver]
rule {
action = "keep"
source_labels = ["JobName"]
regex = join(filters.config.default.sql_jobs, "|")
}
rule {
action = "replace"
target_label = "alert"
replacement = "critical"
}
}
prometheus.relabel "tag" {
rule {
action = "replace"
target_label = "job"
replacement = "integration/windows"
}
forward_to = [prometheus.remote_write.prometheus.receiver]
}
prometheus.remote_write "prometheus" {
endpoint {
url = "https://mimir.example.com/api/v1/metrics/write"
}
}
loki.write "loki" {
endpoint {
url = "https://loki.example.com/loki/api/v1/push"
}
}
loki.source.windowsevent "events_application" {
eventlog_name = "Application"
forward_to = [loki.relabel.events.receiver]
}
loki.source.windowsevent "events_security" {
eventlog_name = "Security"
forward_to = [loki.relabel.events.receiver]
}
loki.source.windowsevent "events_setup" {
eventlog_name = "Setup"
forward_to = [loki.relabel.events.receiver]
}
loki.source.windowsevent "events_system" {
eventlog_name = "System"
forward_to = [loki.relabel.events.receiver]
}
loki.relabel "events" {
forward_to = [loki.process.events.receiver]
rule {
action = "replace"
replacement = "integration/windows"
target_label = "job"
}
}
loki.process "events" {
forward_to = [loki.write.loki.receiver]
stage.json {
expressions = {
level = "",
source = "",
}
}
stage.labels {
values = {
level = "",
source = "",
}
}
}
filters.alloy
declare "config" {
// Regex to match service names
// Service name in lowercase
export "services" {
value = [
"alloy",
]
}
// Regex to match scheduled task names
export "tasks" {
value = [
"/Microsoft/.+",
]
}
// Regex to match sql jobs
export "sql_jobs" {
value = [
"Integration-.+",
"Integration_.+",
]
}
}