We did an upgrade to our Loki-stack from version:2.1.0
to version 2.4.1
, After the upgrade Grafana Alert Rules are not working previously we were running version: 6.7
of Grafana, and now we running version:8.0
. In the dashboard am getting the below error.
Checking the logs on Grafana pod am getting the below error.
t=2022-05-11T13:16:12+0000 lvl=info msg="Request Completed" logger=context userId=1 orgId=1 uname=user1 method=GET path=/api/prometheus/grafana/api/v1/rules status=404 remote_addr=159.12.0.4 time_ms=2 size=29 referer="https://digisc.dev.company.com/internal/grafana/d/k8s_views_global/kubernetes-views-global-version-8-grafana?orgId=1&refresh=30s"
t=2022-05-11T13:30:57+0000 lvl=info msg="Request Completed" logger=context userId=1 orgId=1 uname=user1 method=GET path=/api/alertmanager/grafana/config/api/v1/alerts status=404 remote_addr=159.12.0.4 time_ms=9 size=29 referer=https://digisc.dev.campany.com/internal/grafana/alerting/notifications
In the browser console am getting below error.
https://digisc.dev.company.com/internal/grafana/api/ruler/1/api/v1/rules 500
We used helm to release our infrastructure changes.
Below is one of the alerts we used::
{
"alert": {
"alertRuleTags": {},
"conditions": [
{
"evaluator": {
"params": [
15
],
"type": "gt"
},
"operator": {
"type": "and"
},
"query": {
"params": [
"C",
"5m",
"now"
]
},
"reducer": {
"params": [],
"type": "last"
},
"type": "query"
}
],
"executionErrorState": "alerting",
"for": "5m",
"frequency": "1m",
"handler": 1,
"name": "Cluster CPU Capacity alert",
"noDataState": "ok",
"notifications": []
},
"aliasColors": {},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "Prometheus",
"fill": 1,
"fillGradient": 0,
"gridPos": {
"h": 5,
"w": 6,
"x": 6,
"y": 9
},
"hiddenSeries": false,
"id": 10,
"legend": {
"avg": false,
"current": false,
"max": false,
"min": false,
"show": true,
"total": false,
"values": false
},
"lines": true,
"linewidth": 1,
"links": [],
"nullPointMode": "null",
"options": {
"dataLinks": []
},
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
"seriesOverrides": [],
"spaceLength": 10,
"stack": false,
"steppedLine": false,
"targets": [
{
"expr": "sum(kube_node_status_capacity_cpu_cores)",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "allocatable",
"refId": "A"
},
{
"expr": "sum(kube_node_status_allocatable_cpu_cores)",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "capacity",
"refId": "B"
},
{
"expr": "sum(kube_pod_container_resource_requests_cpu_cores)",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "requested",
"refId": "C"
}
],
"thresholds": [
{
"colorMode": "critical",
"fill": true,
"line": true,
"op": "gt",
"value": 15
}
],
"timeFrom": null,
"timeRegions": [],
"timeShift": null,
"title": "Cluster CPU Capacity",
"tooltip": {
"shared": true,
"sort": 0,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": []
},
"yaxes": [
{
"decimals": null,
"format": "none",
"label": "cores",
"logBase": 1,
"max": null,
"min": null,
"show": true
},
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
}
],
"yaxis": {
"align": false,
"alignLevel": null
}
}```
I am not well versed with this topic maybe we need to update our JSON dashboards can you please share a template for rules or advice on the solution.
Thanks