Hello
Like in topic. I provisioned alerts via ConfigMap with such file:
apiVersion: 1
contactPoints:
- orgId: 1
name: slack-alerts
receivers:
- uid: aef3y124nw9oga
type: slack
settings:
url: hook
disableResolveMessage: false
groups:
- orgId: 1
name: ElasticCacheAlerts
folder: AWSCloudWatch
interval: 1m
rules:
- uid: aef3y24nw9oga
title: ElasticCache - CPUUtilization
condition: C
data:
- refId: A
relativeTimeRange:
from: 600
to: 0
datasourceUid: PB90C5F94D9B4D2E1
model:
datasource:
type: cloudwatch
uid: PB90C5F94D9B4D2E1
dimensions:
CacheClusterId: '*'
expression: ""
id: ""
intervalMs: 1000
label: ""
logGroups: []
matchExact: true
maxDataPoints: 43200
metricEditorMode: 0
metricName: CPUUtilization
metricQueryType: 0
namespace: AWS/ElastiCache
period: ""
queryMode: Metrics
refId: A
region: default
sqlExpression: ""
statistic: Average
- refId: B
relativeTimeRange:
from: 600
to: 0
datasourceUid: __expr__
model:
conditions:
- evaluator:
params: []
type: gt
operator:
type: and
query:
params:
- B
reducer:
params: []
type: last
type: query
datasource:
type: __expr__
uid: __expr__
expression: A
intervalMs: 1000
maxDataPoints: 43200
reducer: last
refId: B
type: reduce
- refId: C
relativeTimeRange:
from: 600
to: 0
datasourceUid: __expr__
model:
conditions:
- evaluator:
params:
- 4
type: gt
operator:
type: and
query:
params:
- C
reducer:
params: []
type: last
type: query
datasource:
type: __expr__
uid: __expr__
expression: B
intervalMs: 1000
maxDataPoints: 43200
refId: C
type: threshold
noDataState: NoData
execErrState: Error
for: 1m
annotations:
summary: CPUUtilization for Redis cluster
isPaused: false
notification_settings:
receiver: slack-alerts
- uid: cef42xlbngxdsd
title: ElasticCache - FreeableMemory
condition: D
data:
- refId: A
relativeTimeRange:
from: 600
to: 0
datasourceUid: PB90C5F94D9B4D2E1
model:
datasource:
type: cloudwatch
uid: PB90C5F94D9B4D2E1
dimensions:
CacheClusterId: '*'
expression: ""
hide: false
id: ""
intervalMs: 1000
label: ""
logGroups: []
matchExact: true
maxDataPoints: 43200
metricEditorMode: 0
metricName: FreeableMemory
metricQueryType: 0
namespace: AWS/ElastiCache
period: ""
queryMode: Metrics
refId: A
region: default
sqlExpression: ""
statistic: Average
- refId: B
relativeTimeRange:
from: 600
to: 0
datasourceUid: __expr__
model:
conditions:
- evaluator:
params: []
type: gt
operator:
type: and
query:
params:
- B
reducer:
params: []
type: last
type: query
datasource:
type: __expr__
uid: __expr__
expression: A
intervalMs: 1000
maxDataPoints: 43200
reducer: last
refId: B
settings:
mode: dropNN
type: reduce
- refId: D
datasourceUid: __expr__
model:
conditions:
- evaluator:
params:
- 0
- 0
type: gt
operator:
type: and
query:
params: []
reducer:
params: []
type: avg
type: query
datasource:
name: Expression
type: __expr__
uid: __expr__
expression: |
${B}/1024**3
hide: false
intervalMs: 1000
maxDataPoints: 43200
refId: D
type: math
- refId: C
datasourceUid: __expr__
model:
conditions:
- evaluator:
params:
- 0.5
- 0
type: lt
operator:
type: and
query:
params: []
reducer:
params: []
type: avg
type: query
datasource:
name: Expression
type: __expr__
uid: __expr__
expression: D
hide: false
intervalMs: 1000
maxDataPoints: 43200
refId: C
type: threshold
noDataState: NoData
execErrState: Error
for: 1m
isPaused: false
notification_settings:
receiver: slack-alerts
- uid: aef474us579c0d
title: ElasticCache - HitMissRatio
condition: G
data:
- refId: A
relativeTimeRange:
from: 600
to: 0
datasourceUid: PB90C5F94D9B4D2E1
model:
datasource:
type: cloudwatch
uid: PB90C5F94D9B4D2E1
dimensions:
CacheClusterId: '*'
expression: ""
id: ""
intervalMs: 1000
label: ""
logGroups: []
matchExact: true
maxDataPoints: 43200
metricEditorMode: 0
metricName: CacheHits
metricQueryType: 0
namespace: AWS/ElastiCache
period: ""
queryMode: Metrics
refId: A
region: default
sqlExpression: ""
statistic: Average
- refId: B
relativeTimeRange:
from: 600
to: 0
datasourceUid: PB90C5F94D9B4D2E1
model:
datasource:
type: cloudwatch
uid: PB90C5F94D9B4D2E1
dimensions:
CacheClusterId: '*'
expression: ""
id: ""
intervalMs: 1000
label: ""
logGroups: []
matchExact: true
maxDataPoints: 43200
metricEditorMode: 0
metricName: CacheMisses
metricQueryType: 0
namespace: AWS/ElastiCache
period: ""
queryMode: Metrics
refId: B
region: default
sqlExpression: ""
statistic: Average
- refId: C
datasourceUid: __expr__
model:
conditions:
- evaluator:
params:
- 0
- 0
type: gt
operator:
type: and
query:
params: []
reducer:
params: []
type: avg
type: query
datasource:
name: Expression
type: __expr__
uid: __expr__
expression: A
intervalMs: 1000
maxDataPoints: 43200
reducer: sum
refId: C
settings:
mode: ""
type: reduce
- refId: D
datasourceUid: __expr__
model:
conditions:
- evaluator:
params:
- 0
- 0
type: gt
operator:
type: and
query:
params: []
reducer:
params: []
type: avg
type: query
datasource:
name: Expression
type: __expr__
uid: __expr__
expression: B
intervalMs: 1000
maxDataPoints: 43200
reducer: sum
refId: D
settings:
mode: ""
type: reduce
- refId: E
datasourceUid: __expr__
model:
conditions:
- evaluator:
params:
- 0
- 0
type: gt
operator:
type: and
query:
params: []
reducer:
params: []
type: avg
type: query
datasource:
name: Expression
type: __expr__
uid: __expr__
expression: (${C}/(${C}+${D}))*100
intervalMs: 1000
maxDataPoints: 43200
refId: E
type: math
- refId: F
datasourceUid: __expr__
model:
conditions:
- evaluator:
params:
- 0
- 0
type: gt
operator:
type: and
query:
params: []
reducer:
params: []
type: avg
type: query
datasource:
name: Expression
type: __expr__
uid: __expr__
expression: E
intervalMs: 1000
maxDataPoints: 43200
reducer: last
refId: F
settings:
mode: dropNN
type: reduce
- refId: G
datasourceUid: __expr__
model:
conditions:
- evaluator:
params:
- 90
- 0
type: lt
operator:
type: and
query:
params: []
reducer:
params: []
type: avg
type: query
datasource:
name: Expression
type: __expr__
uid: __expr__
expression: F
intervalMs: 1000
maxDataPoints: 43200
refId: G
type: threshold
noDataState: NoData
execErrState: Error
for: 1m
annotations:
summary: Hit / (Hit + Miss) * 100 ratio is bellow 90%
isPaused: false
notification_settings:
receiver: slack-alerts
When i wanted change summary and change above to below (already done in config) in last rule nothing happened. I removed all file and alert was still in grafana. I replaced this file with such one:
apiVersion: 1
deleteRules:
- orgId: 1
uid: aef3y24nw9oga
- orgId: 1
uid: cef42xlbngxdsd
- orgId: 1
uid: aef474us579c0d
and still nothing. Then i tried to delete rules with api but i get such logs:
logger=context userId=10 orgId=1 uname=sa-1-curl t=2025-03-10T14:50:48.827749335Z level=error msg="Request Completed" method=DELETE path=/api/v1/provisioning/alert-rules/aef3y24nw9oga status=500 remote_addr=10.3.0.228 time_ms=19 duration=19.989643ms size=35 referer= handler=/api/v1/provisioning/alert-rules/:UID status_source=server error="cannot delete with provided provenance 'api', needs 'file'"
logger=context userId=10 orgId=1 uname=sa-1-curl t=2025-03-10T14:52:34.20657324Z level=error msg="Request Completed" method=DELETE path=/api/v1/provisioning/alert-rules/aef3y24nw9oga status=500 remote_addr=10.3.0.228 time_ms=22 duration=22.06255ms size=35 referer= handler=/api/v1/provisioning/alert-rules/:UID status_source=server error="cannot delete with provided provenance '', needs 'file'"
logger=context userId=10 orgId=1 uname=sa-1-curl t=2025-03-10T14:55:35.547925799Z level=error msg="Request Completed" method=DELETE path=/api/v1/provisioning/alert-rules/aef474us579c0d status=500 remote_addr=10.3.0.228 time_ms=35 duration=35.361487ms size=35 referer= handler=/api/v1/provisioning/alert-rules/:UID status_source=server error="cannot delete with provided provenance '', needs 'file'"
logger=context userId=10 orgId=1 uname=sa-1-curl t=2025-03-10T14:55:46.907139189Z level=error msg="Request Completed" method=DELETE path=/api/v1/provisioning/alert-rules/aef474us579c0d status=500 remote_addr=10.3.0.228 time_ms=21 duration=21.264067ms size=35 referer= handler=/api/v1/provisioning/alert-rules/:UID status_source=server error="cannot delete with provided provenance 'api', needs 'file'"
logger=context userId=10 orgId=1 uname=sa-1-curl t=2025-03-10T14:57:30.913133743Z level=error msg="Request Completed" method=DELETE path=/api/v1/provisioning/alert-rules/aef474us579c0d status=500 remote_addr=10.3.0.228 time_ms=22 duration=22.628633ms size=35 referer= handler=/api/v1/provisioning/alert-rules/:UID status_source=server error="cannot delete with provided provenance 'api', needs 'file'"
at the end i tried to workaround it with:
curl -X DELETE -H "X-Disable-Provenance: true" -H "Authorization: Bearer $BARRER_GRAFANA" https://grafana-dev.company.com/api/v1/provisioning/folder/def0lgfvs3qbkb/rule-groups/ElasticCacheAlerts
but didnt help too (same error like with rule deletion)
Any idea how to handle update/deletion of provisioned alerts in grafana? im using unified_alerting
EDIT:
im getting such errors during grafana start, looks like he want to registry them again:
"failed to register storage metrics" error="duplicate metrics collector registration attempted"
its repeated 3 times