I am using the prometheus-community/kube-prometheus-stack helm chart for grafana-prometheus stack and the grafana/loki helm chart in simple scalable mode.
I am trying to use the recording rule feature in loki to pre-compute the count from a query and store it as a metric. This is my loki helm chart values:
loki:
auth_enabled: false
commonConfig:
replication_factor: 3
# heartbeat_timeout: 10m
server:
http_server_read_timeout: 600s
limits_config:
max_query_length: 0h
query_timeout: 600s
schemaConfig:
configs:
- from: 2024-09-02
store: tsdb
object_store: s3
schema: v13
index:
prefix: loki_index_
period: 24h
storage:
type: s3
bucketNames:
chunks: "chunks"
#ruler: "ruler"
admin: "admin"
storage_config:
aws:
s3: s3://eu-central-1
bucketnames: gis-malz-prod-grafana-loki-chunk-store
sse:
type: "SSE-S3"
ingester:
chunk_encoding: snappy
chunk_target_size: 1572864
max_chunk_age: 1h
chunk_idle_period: 1h
tracing:
enabled: true
querier:
# Default is 4, if you have enough memory and CPU you can increase, reduce if OOMing
max_concurrent: 2
rulerConfig:
rule_path: /rules # Path to store rules locally (used when storage type is "filesystem")
evaluation_interval: 1m
ring:
kvstore:
store: "inmemory" # Store used for coordination of rulers
storage:
type: local
local:
directory: /rules
enable_api: true
remote_write:
enabled: true
client:
#url: 'http://prometheus-operated.monitoring.svc.cluster.local/api/v1/write'
url: 'https://gris.cloud.syngenta.org/prometheus/api/v1/write'
# rule_files:
# - /rules/rules.yaml
persistence:
enabled: true
accessModes:
- ReadWriteOnce
size: 50Gi
extraObjects:
- apiVersion: v1
kind: ConfigMap
metadata:
name: loki-recording-rules
labels:
name: loki-recording-rules
data:
rules.yaml: |-
groups:
- name: geoserver_web_internal
rules:
- record: job:requests:geoserver_web_internal
expr: count_over_time({__aws_s3_lb="malz-prod"} | label_format __stream_shard__="", detected_level="" |~ "(GET|POST) https://gris.cloud.syngenta.org:443/geoserver/web" [1m])
- name: geoserver_apis_internal
rules:
- record: job:requests:geoserver_api_internal
expr: count_over_time({__aws_s3_lb="malz-prod"} | label_format __stream_shard__="", detected_level="" |~ "(GET|POST) https://gris.cloud.syngenta.org:443/geoserver/gris" [1m])
- name: fbm_internal
rules:
- record: job:requests:fbm_internal
expr: count_over_time({__aws_s3_lb="malz-prod"} | label_format __stream_shard__="", detected_level="" |~ "(GET|POST) https://gris.cloud.syngenta.org:443/fbm/v1" [1m])
- name: crop_rotation_internal
rules:
- record: job:requests:crop_rotation_internal
expr: count_over_time({__aws_s3_lb="malz-prod"} | label_format __stream_shard__="", detected_level="" |~ "(GET|POST) https://gris.cloud.syngenta.org:443/processes/croprotation" [1m])
- name: publishing_api_internal
rules:
- record: job:requets:publishing_api_internal
expr: count_over_time({__aws_s3_lb="malz-prod"} | label_format __stream_shard__="", detected_level="" |~ "(GET|POST) https://gris.cloud.syngenta.org:443/api" [1m])
- name: catalog_internal
rules:
- record: job:requests:catalog_internal
expr: count_over_time({__aws_s3_lb="malz-prod"} | label_format __stream_shard__="", detected_level="" |~ "(GET|POST) https://gris.cloud.syngenta.org:443/catalog" [1m])
- name: datafactory_ui_internal
rules:
- record: job:requests:datafactory_ui_internal
expr: count_over_time({__aws_s3_lb="malz-prod"} | label_format __stream_shard__="", detected_level="" |~ "(GET|POST) https://gris.cloud.syngenta.org:443/datafactory-ui" [1m])
- name: datafactory_admin_internal
rules:
- record: job:requests:datafactory_admin_internal
expr: count_over_time({__aws_s3_lb="malz-prod"} | label_format __stream_shard__="", detected_level="" |~ "(GET|POST) https://gris.cloud.syngenta.org:443/df-admin" [1m])
- name: airflow_internal
rules:
- record: job:requests:airflow_internal
expr: count_over_time({__aws_s3_lb="malz-prod"} | label_format __stream_shard__="", detected_level="" |~ "(GET|POST) https://gris.cloud.syngenta.org:443/datafactory" [1m])
- name: notificaton_api_internal
rules:
- record: job:requests:notification_api_internal
expr: count_over_time({__aws_s3_lb="malz-prod"} | label_format __stream_shard__="", detected_level="" |~ "(GET|POST) https://gris.cloud.syngenta.org:443/notification" [1m])
- name: ecm_internal
rules:
- record: job:requests:ecm_internal
expr: count_over_time({__aws_s3_lb="malz-prod"} | label_format __stream_shard__="", detected_level="" |~ "(GET|POST) https://gris.cloud.syngenta.org:443/ecm" [1m])
- name: geoserver_web_external
rules:
- record: job:requests:geoserver_web_external
expr: count_over_time({filename="/var/log/nginx/access.log"} | label_format __stream_shard__="", detected_level="" |~ "(GET|POST) /geoserver/web" [1m])
- name: geoserver_apis_external
rules:
- record: job:requests:geoserver_api_external
expr: count_over_time({filename="/var/log/nginx/access.log"} | label_format __stream_shard__="", detected_level="" |~ "(GET|POST) /geoserver/gris" [1m])
- name: fbm_external
rules:
- record: job:requests:fbm_external
expr: count_over_time({filename="/var/log/nginx/access.log"} | label_format __stream_shard__="", detected_level="" |~ "(GET|POST) /fbm/v1" [1m])
- name: crop_rotation_external
rules:
- record: job:requests:crop_rotation_external
expr: count_over_time({filename="/var/log/nginx/access.log"} | label_format __stream_shard__="", detected_level="" |~ "(GET|POST) /processes/croprotation" [1m])
- |
gateway:
ingress:
enabled: true
ingressClassName: 'alb'
annotations:
kubernetes.io/ingress.class: alb
alb.ingress.kubernetes.io/load-balancer-name: malz-prod
alb.ingress.kubernetes.io/load-balancer-attributes: idle_timeout.timeout_seconds=120
alb.ingress.kubernetes.io/listen-ports: '[{"HTTPS":443}, {"HTTP":80}]'
alb.ingress.kubernetes.io/certificate-arn: "arn:aws:acm:eu-central-1:********:certificate/aeca61e7-7380-4efb-b97d-f93c3bd4b42d"
alb.ingress.kubernetes.io/ssl-redirect: '443'
alb.ingress.kubernetes.io/backend-protocol: HTTP
alb.ingress.kubernetes.io/healthcheck-interval-seconds: "15"
alb.ingress.kubernetes.io/healthcheck-path: "/loki/ready"
alb.ingress.kubernetes.io/healthcheck-port: traffic-port
alb.ingress.kubernetes.io/healthcheck-timeout-seconds: "5"
alb.ingress.kubernetes.io/healthy-threshold-count: "2"
alb.ingress.kubernetes.io/unhealthy-threshold-count: "2"
alb.ingress.kubernetes.io/success-codes: 200,201
alb.ingress.kubernetes.io/target-group-attributes: deregistration_delay.timeout_seconds=30
alb.ingress.kubernetes.io/group.name: albintprod
alb.ingress.kubernetes.io/scheme: internal
alb.ingress.kubernetes.io/target-type: ip
hosts:
- host: gris.cloud.syngenta.org
paths:
- path: /loki
pathType: Prefix
serviceAccount:
create: true
name: null
imagePullSecrets: []
annotations:
"eks.amazonaws.com/role-arn": "arn:aws:iam::**********:role/gis-prod-pods-RW-role"
automountServiceAccountToken: true
backend:
replicas: 2
# extraVolumes:
# - name: loki-rules
# configMap:
# name: loki-recording-rules
# extraVolumeMounts:
# - name: loki-rules
# mountPath: /recording-rules
read:
replicas: 3
extraVolumes:
- name: loki-rules
configMap:
name: loki-recording-rules
extraVolumeMounts:
- name: loki-rules
mountPath: /rules
write:
replicas: 2
extraVolumes:
- name: loki-rules
configMap:
name: loki-recording-rules
extraVolumeMounts:
- name: loki-rules
mountPath: /rules
sidecar:
image:
repository: kiwigrid/k8s-sidecar
tag: 1.27.5
sha: ""
pullPolicy: IfNotPresent
resources: {}
securityContext: {}
skipTlsVerify: false
enableUniqueFilenames: false
readinessProbe: {}
livenessProbe: {}
rules:
enabled: true
label: name
labelValue: "loki-recording-rules"
folder: /rules
searchNamespace: null
watchMethod: WATCH
resource: configmap
script: null
watchServerTimeout: 60
watchClientTimeout: 60
logLevel: INFO
This is my prometheus-community/kube-prometheus-stack helm values:
defaultRules:
create: true
rules:
alertmanager: true
etcd: true
configReloaders: true
general: true
k8s: true
kubeApiserverAvailability: true
kubeApiserverBurnrate: true
kubeApiserverHistogram: true
kubeApiserverSlos: true
kubeControllerManager: true
kubelet: true
kubeProxy: true
kubePrometheusGeneral: true
kubePrometheusNodeRecording: true
kubernetesApps: true
kubernetesResources: true
kubernetesStorage: true
kubernetesSystem: true
kubeSchedulerAlerting: true
kubeSchedulerRecording: true
kubeStateMetrics: true
network: true
node: true
nodeExporterAlerting: true
nodeExporterRecording: true
prometheus: true
prometheusOperator: true
appNamespacesTarget: ".*"
env:
AWS_ROLE_ARN: arn:aws:iam::533792813270:role/gis-prod-grafana-pods-role
AWS_WEB_IDENTITY_TOKEN_FILE: /var/run/secrets/eks.amazonaws.com/serviceaccount/token
AWS_REGION: eu-central-1
extraSecretMounts:
# for AWS EKS (cloudwatch) use the following (see also instruction in env: above)
- name: aws-iam-token
mountPath: /var/run/secrets/eks.amazonaws.com/serviceaccount
readOnly: true
projected:
defaultMode: 420
sources:
- serviceAccountToken:
audience: sts.amazonaws.com
expirationSeconds: 86400
path: token
##-prometheusOperator-##
## Manages Prometheus and Alertmanager components
prometheusOperator:
enabled: true
tls:
enabled: true
tlsMinVersion: VersionTLS13
internalPort: 10250
admissionWebhooks:
failurePolicy: ""
timeoutSeconds: 10
enabled: true
caBundle: ""
annotations: {}
# argocd.argoproj.io/hook: PreSync
# argocd.argoproj.io/hook-delete-policy: HookSucceeded
patch:
enabled: true
image:
registry: registry.k8s.io
repository: ingress-nginx/kube-webhook-certgen
tag: v20221220-controller-v1.5.1-58-g787ea74b6
sha: ""
pullPolicy: IfNotPresent
resources: {}
priorityClassName: ""
annotations: {}
# argocd.argoproj.io/hook: PreSync
# argocd.argoproj.io/hook-delete-policy: HookSucceeded
podAnnotations: {}
nodeSelector: {}
affinity: {}
tolerations: []
securityContext:
runAsGroup: 2000
runAsNonRoot: true
runAsUser: 2000
seccompProfile:
type: RuntimeDefault
# Security context for create job container
createSecretJob:
securityContext:
allowPrivilegeEscalation: false
readOnlyRootFilesystem: true
capabilities:
drop:
- ALL
# Security context for patch job container
patchWebhookJob:
securityContext:
allowPrivilegeEscalation: false
readOnlyRootFilesystem: true
capabilities:
drop:
- ALL
# Use certmanager to generate webhook certs
certManager:
enabled: false
rootCert:
duration: ""
admissionCert:
duration: ""
namespaces: {}
denyNamespaces: []
alertmanagerInstanceNamespaces: []
alertmanagerConfigNamespaces: []
prometheusInstanceNamespaces: []
thanosRulerInstanceNamespaces: []
# clusterDomain: "cluster.local"
networkPolicy:
enabled: false
flavor: kubernetes
serviceAccount:
create: true
name: ""
service:
annotations: {}
labels: {}
clusterIP: ""
nodePort: 30080
nodePortTls: 30443
additionalPorts: []
loadBalancerIP: ""
loadBalancerSourceRanges: []
externalTrafficPolicy: Cluster
type: ClusterIP
externalIPs: []
labels: {}
annotations: {}
podLabels: {}
podAnnotations: {}
kubeletService:
enabled: true
namespace: kube-system
name: ""
serviceMonitor:
additionalLabels: {}
interval: ""
sampleLimit: 0
targetLimit: 0
labelLimit: 0
labelNameLengthLimit: 0
labelValueLengthLimit: 0
scrapeTimeout: ""
selfMonitor: true
metricRelabelings: []
relabelings: []
resources:
limits:
cpu: 1000m
memory: 2000Mi
requests:
cpu: 100m
memory: 100Mi
hostNetwork: false
nodeSelector: {}
tolerations: []
affinity: {}
dnsConfig: {}
securityContext:
fsGroup: 65534
runAsGroup: 65534
runAsNonRoot: true
runAsUser: 65534
seccompProfile:
type: RuntimeDefault
containerSecurityContext:
allowPrivilegeEscalation: false
readOnlyRootFilesystem: true
capabilities:
drop:
- ALL
## Prometheus-operator image
image:
registry: quay.io
repository: prometheus-operator/prometheus-operator
tag: ""
sha: ""
pullPolicy: IfNotPresent
prometheusConfigReloader:
image:
registry: quay.io
repository: prometheus-operator/prometheus-config-reloader
tag: ""
sha: ""
enableProbe: false
resources:
requests:
cpu: 200m
memory: 50Mi
limits:
cpu: 200m
memory: 50Mi
##-Prometheus-##
prometheus:
enabled: true
annotations: {}
networkPolicy:
enabled: failure-domain.beta.kubernetes.io/zone
flavor: kubernetes
serviceAccount:
create: true
name: ""
annotations: {}
service:
annotations: {}
labels: {}
clusterIP: ""
port: 9090
targetPort: 9090
externalIPs: []
nodePort: 30090
loadBalancerIP: ""
loadBalancerSourceRanges: []
externalTrafficPolicy: Cluster
type: ClusterIP
additionalPorts: []
publishNotReadyAddresses: false
sessionAffinity: ""
tlsSecretPerReplica:
enabled: false
prefix: "prometheus"
ingress:
## If true, Prometheus Ingress will be created
enabled: true
## IngressClassName for Promtheus Ingress.
## Should be provided if Ingress is enable.
ingressClassName: alb
## Annotations for Prometheus Ingress
annotations: {
alb.ingress.kubernetes.io/backend-protocol: "HTTP",
alb.ingress.kubernetes.io/group.name: "albintprod",
alb.ingress.kubernetes.io/listen-ports: '[{"HTTPS":443}, {"HTTP":80}]',
alb.ingress.kubernetes.io/certificate-arn: "arn:aws:acm:eu-central-1:533792813270:certificate/aeca61e7-7380-4efb-b97d-f93c3bd4b42d",
#alb.ingress.kubernetes.io/ssl-redirect: '443',
alb.ingress.kubernetes.io/healthcheck-interval-seconds: "15",
alb.ingress.kubernetes.io/healthcheck-path: "/",
alb.ingress.kubernetes.io/healthcheck-port: "traffic-port",
alb.ingress.kubernetes.io/healthcheck-timeout-seconds: "5",
alb.ingress.kubernetes.io/healthy-threshold-count: "2",
alb.ingress.kubernetes.io/load-balancer-name: "malz-prod",
alb.ingress.kubernetes.io/scheme: "internal",
alb.ingress.kubernetes.io/success-codes: "200,201",
alb.ingress.kubernetes.io/target-group-attributes: "deregistration_delay.timeout_seconds=30",
alb.ingress.kubernetes.io/target-type: "ip",
alb.ingress.kubernetes.io/unhealthy-threshold-count: "2",
kubernetes.io/ingress.class: "alb"
}
## Hostnames.
## Must be provided if Ingress is enable.
hosts:
- gris.cloud.syngenta.org
## Path for grafana ingress
paths:
- /prometheus
podSecurityPolicy:
allowedCapabilities: []
allowedHostPaths: []
volumes: []
serviceMonitor:
interval: ""
selfMonitor: true
additionalLabels: {}
sampleLimit: 0
targetLimit: 0
labelLimit: 0
labelNameLengthLimit: 0
labelValueLengthLimit: 0
scheme: ""
tlsConfig: {}
bearerTokenFile:
metricRelabelings: []
relabelings: []
prometheusSpec:
disableCompaction: false
apiserverConfig: {}
additionalArgs: []
scrapeInterval: ""
scrapeTimeout: ""
evaluationInterval: ""
listenLocal: false
enableAdminAPI: false
version: ""
web: {}
exemplars: ""
enableFeatures: []
image:
registry: quay.io
repository: prometheus/prometheus
tag: v2.44.0
sha: ""
tolerations: []
topologySpreadConstraints: []
alertingEndpoints: []
externalLabels: {}
enableRemoteWriteReceiver: true
replicaExternalLabelName: ""
replicaExternalLabelNameClear: false
prometheusExternalLabelName: ""
prometheusExternalLabelNameClear: false
externalUrl: "gris.cloud.syngenta.org/prometheus"
nodeSelector: {}
secrets: []
configMaps: []
query: {}
ruleNamespaceSelector: {}
ruleSelectorNilUsesHelmValues: false
ruleSelector: {}
serviceMonitorSelectorNilUsesHelmValues: false
serviceMonitorSelector: {}
serviceMonitorNamespaceSelector: {}
podMonitorSelectorNilUsesHelmValues: false
podMonitorSelector: {}
podMonitorNamespaceSelector: {}
probeSelectorNilUsesHelmValues: false
probeSelector: {}
probeNamespaceSelector: {}
scrapeConfigSelectorNilUsesHelmValues: false
scrapeConfigSelector: {}
scrapeConfigNamespaceSelector: {}
retention: 10d
retentionSize: ""
tsdb:
outOfOrderTimeWindow: 0s
walCompression: true
paused: false
replicas: 1
shards: 1
logLevel: info
logFormat: logfmt
routePrefix: /
podMetadata: {}
podAntiAffinity: ""
podAntiAffinityTopologyKey: kubernetes.io/hostname
affinity: {}
remoteRead: []
# - url: http://remote1/read
## additionalRemoteRead is appended to remoteRead
additionalRemoteRead: []
remoteWrite: []
# - url: http://remote1/push
## additionalRemoteWrite is appended to remoteWrite
additionalRemoteWrite: []
remoteWriteDashboards: false
resources:
requests:
memory: 400Mi
storageSpec: {}
volumes: []
volumeMounts: []
# additionalScrapeConfigs: |
# - job_name: 'loki'
# static_configs:
# #- targets: ['http://loki-gateway.monitoring.svc.cluster.local']
# - targets: ['gris.cloud.syngenta.org/loki']
additionalScrapeConfigsSecret: {}
additionalPrometheusSecretsAnnotations: {}
additionalAlertManagerConfigs: []
additionalAlertManagerConfigsSecret: {}
additionalAlertRelabelConfigs: []
additionalAlertRelabelConfigsSecret: {}
securityContext:
runAsGroup: 2000
runAsNonRoot: true
runAsUser: 1000
fsGroup: 2000
seccompProfile:
type: RuntimeDefault
priorityClassName: ""
thanos: {}
containers: []
initContainers: []
portName: "web"
arbitraryFSAccessThroughSMs: false
overrideHonorLabels: false
overrideHonorTimestamps: false
ignoreNamespaceSelectors: false
enforcedNamespaceLabel: ""
prometheusRulesExcludedFromEnforce: []
excludedFromEnforcement: []
queryLogFile: false
enforcedSampleLimit: false
enforcedTargetLimit: false
enforcedLabelLimit: false
enforcedLabelNameLengthLimit: false
enforcedLabelValueLengthLimit: false
allowOverlappingBlocks: false
minReadySeconds: 0
hostNetwork: false
hostAliases: []
additionalRulesForClusterRole: []
additionalServiceMonitors: []
additionalPodMonitors: []
thanosRuler:
enabled: false
cleanPrometheusOperatorObjectNames: false
extraManifests: []
I dont see any errors in the loki-backend pods related to ruler, but I can see the ruler component is up from the backend pod logs.
Will this metric be available on loki to view? Can prometheus scrape these metrics from Loki?
I have tried the loki remote_write option to write it to the prometheus, that does not show me the new metric in prometheus.
.