Hello,
I have loki on global cluster, it works fine.
We have a need to evaluate rules on different dev/prod clusters (the rules are auto generated on these clusters depending on what is deployed there).
I expected I could deploy loki rule on these clusters and set global loki as target to query it.
I am using loki-distributed helm chart.
Config looks like:
fullnameOverride: "loki"
serviceMonitor:
enabled: true
labels:
prometheus: main
prometheusRule:
enabled: false
compactor:
enabled: false
ingester:
replicas: 0
distributor:
replicas: 0
gateway:
replicas: 0
querier:
replicas: 0
queryFrontend:
replicas: 1
resources:
requests:
cpu: 50m
memory: 128Mi
limits:
cpu: "1"
memory: 1G
extraEnv:
- name: GOOGLE_APPLICATION_CREDENTIALS
value: /etc/secrets/key.json
extraVolumeMounts:
- name: loki-gcs-secret
mountPath: /etc/secrets
extraVolumes:
- name: loki-gcs-secret
secret:
secretName: loki-mr-secret
extraArgs:
- --log.format=json
- --log.level=info
tableManager:
enabled: false
ruler:
replicas: 1
enabled: true
resources:
requests:
cpu: "1.5"
memory: 3Gi
limits:
cpu: "7"
memory: 6Gi
persistence:
enabled: true
size: 10Gi
extraEnv:
- name: GOOGLE_APPLICATION_CREDENTIALS
value: /etc/secrets/key.json
extraVolumes:
- name: loki-operator-rules
configMap:
name: loki-operator-rules
defaultMode: 420
- name: rules-tmp
emptyDir: {}
- name: loki-mr-secret
secret:
secretName: loki-mr-secret
extraVolumeMounts:
- name: loki-operator-rules
mountPath: /var/loki/rules/cit1-k8s
- name: rules-tmp
mountPath: /tmp/loki/rules
- name: loki-mr-secret
mountPath: /etc/secrets
extraArgs:
- --log.format=json
- --log.level=info
indexGateway:
enabled: false
memcachedExporter:
enabled: false
memcachedChunks:
enabled: false
memcachedFrontend:
enabled: false
memcachedIndexQueries:
enabled: false
memcachedIndexWrites:
enabled: false
networkPolicy:
enabled: false
loki:
schemaConfig:
configs:
- from: 2021-06-05
store: boltdb-shipper
object_store: gcs
schema: v11
index:
prefix: loki_index_
period: 24h
storageConfig:
gcs:
bucket_name: something-loki-multiregional
boltdb_shipper:
active_index_directory: /var/loki/index
shared_store: gcs
cache_location: /var/loki/cache
structuredConfig:
auth_enabled: true
common:
replication_factor: 1
frontend:
max_outstanding_per_tenant: 4096
downstream_url: https://loki-gateway.int.something.live
ruler:
storage:
local:
directory: /var/loki/rules
rule_path: /tmp/loki/rules
enable_api: true
alertmanager_url: http://kube-prometheus-stack-alertmanager.monitoring.svc.cluster.local:9093
enable_alertmanager_v2: true
wal:
dir: /var/loki/wal
remote_write:
enabled: false
evaluation:
mode: remote
query_frontend:
address: loki-query-frontend-grpc.global.int.something.live:443
Logs are:
2024-03-20 20:10:36.517
{
"caller": "compat.go:78",
"err": "empty ring",
"level": "error",
"msg": "rule evaluation failed",
"query": "(sum by (cluster,namespace,message,level,thread,logger,exception,app)(count_over_time({level=\"ERROR\", marker=\"AppLog\", namespace=\"{{ $namespace }}\", app=\"{{ $app }}\"} | json | label_format exception=\"{{ trunc 100 .exception }}\" | label_format message=\"{{ trunc 100 .exception }}\"[1m])) > 0)",
"query_hash": 483524370,
"rule_name": "Service Error Detected",
"rule_type": "alerting",
"ts": "2024-03-20T18:10:36.498081589Z",
"user": "cit1-k8s"
}
2024-03-20 20:10:36.517
{
"cache_chunk_bytes_fetched": 0,
"cache_chunk_bytes_stored": 0,
"cache_chunk_download_time": "0s",
"cache_chunk_hit": 0,
"cache_chunk_req": 0,
"cache_index_download_time": "0s",
"cache_index_hit": 0,
"cache_index_req": 0,
"cache_result_download_time": "0s",
"cache_result_hit": 0,
"cache_result_req": 0,
"cache_stats_results_download_time": "0s",
"cache_stats_results_hit": 0,
"cache_stats_results_req": 0,
"caller": "metrics.go:159",
"component": "ruler",
"duration": "478.345µs",
"end_delta": "1.623118ms",
"evaluation_mode": "local",
"latency": "fast",
"length": "0s",
"level": "info",
"limit": 0,
"lines_per_second": 0,
"org_id": "cit1-k8s",
"post_filter_lines": 0,
"query": "(sum by (cluster,namespace,message,level,thread,logger,exception,app)(count_over_time({level=\"ERROR\", marker=\"AppLog\", namespace=\"{{ $namespace }}\", app=\"{{ $app }}\"} | json | label_format exception=\"{{ trunc 100 .exception }}\" | label_format message=\"{{ trunc 100 .exception }}\"[1m])) > 0)",
"query_hash": 483524370,
"query_type": "metric",
"queue_time": "0s",
"range_type": "instant",
"returned_lines": 0,
"shards": 0,
"splits": 0,
"start_delta": "1.622884ms",
"status": "500",
"step": "0s",
"store_chunks_download_time": "0s",
"throughput": "0B",
"total_bytes": "0B",
"total_bytes_structured_metadata": "0B",
"total_entries": 0,
"total_lines": 0,
"traceID": "434eb7749cb8c56e",
"ts": "2024-03-20T18:10:36.497961389Z"
}
2024-03-20 20:10:36.517
{
"caller": "engine.go:232",
"component": "ruler",
"evaluation_mode": "local",
"level": "info",
"msg": "executing query",
"org_id": "cit1-k8s",
"query": "(sum by (cluster,namespace,message,level,thread,logger,exception,app)(count_over_time({level=\"ERROR\", marker=\"AppLog\", namespace=\"{{ $namespace }}\", app=\"{{ $app }}\"} | json | label_format exception=\"{{ trunc 100 .exception }}\" | label_format message=\"{{ trunc 100 .exception }}\"[1m])) > 0)",
"query_hash": 483524370,
"traceID": "434eb7749cb8c56e",
"ts": "2024-03-20T18:10:36.497309039Z",
"type": "instant"
}
2024-03-20 20:10:36.517
{
"caller": "compat.go:66",
"level": "info",
"msg": "evaluating rule",
"query": "(sum by (cluster,namespace,message,level,thread,logger,exception,app)(count_over_time({level=\"ERROR\", marker=\"AppLog\", namespace=\"{{ $namespace }}\", app=\"{{ $app }}\"} | json | label_format exception=\"{{ trunc 100 .exception }}\" | label_format message=\"{{ trunc 100 .exception }}\"[1m])) > 0)",
"query_hash": 483524370,
"rule_name": "Service Error Detected",
"rule_type": "alerting",
"ts": "2024-03-20T18:10:36.49703202Z",
"user": "cit1-k8s"
}
2024-03-20 20:10:35.716
{
"caller": "manager.go:995",
"level": "info",
"msg": "Starting rule manager...",
"ts": "2024-03-20T18:10:35.686295555Z",
"user": "cit1-k8s"
}
2024-03-20 20:10:35.716
{
"caller": "memberlist_client.go:595",
"err": "1 error occurred:\n\t* Failed to resolve loki-memberlist: lookup loki-memberlist on 10.2.72.10:53: no such host\n\n",
"level": "warn",
"msg": "joining memberlist cluster: failed to reach any nodes",
"retries": 0,
"ts": "2024-03-20T18:10:35.650144918Z"
}
2024-03-20 20:10:35.716
{
"caller": "memberlist_logger.go:74",
"level": "warn",
"msg": "Failed to resolve loki-memberlist: lookup loki-memberlist on 10.2.72.10:53: no such host",
"ts": "2024-03-20T18:10:35.650080664Z"
}
2024-03-20 20:10:35.716
{
"caller": "loki.go:505",
"level": "info",
"msg": "Loki started",
"ts": "2024-03-20T18:10:35.647333951Z"
}
2024-03-20 20:10:35.716
{
"caller": "ruler.go:528",
"level": "info",
"msg": "ruler up and running",
"ts": "2024-03-20T18:10:35.647260413Z"
}
2024-03-20 20:10:35.716
{
"caller": "module_service.go:82",
"level": "info",
"module": "ruler",
"msg": "initialising",
"ts": "2024-03-20T18:10:35.647129633Z"
}
2024-03-20 20:10:35.716
{
"caller": "module_service.go:82",
"level": "info",
"module": "store",
"msg": "initialising",
"ts": "2024-03-20T18:10:35.646987931Z"
}
2024-03-20 20:10:35.716
{
"caller": "module_service.go:82",
"level": "info",
"module": "ingester-querier",
"msg": "initialising",
"ts": "2024-03-20T18:10:35.646917882Z"
}
2024-03-20 20:10:35.716
{
"caller": "ring.go:273",
"level": "info",
"msg": "ring doesn't exist in KV store yet",
"ts": "2024-03-20T18:10:35.646746334Z"
}
2024-03-20 20:10:35.716
{
"caller": "memberlist_client.go:573",
"join_members": "loki-memberlist",
"level": "info",
"msg": "joining memberlist cluster",
"ts": "2024-03-20T18:10:35.646674014Z"
}
2024-03-20 20:10:35.716
{
"caller": "memberlist_client.go:560",
"elapsed_time": "19.942534ms",
"joined_nodes": 0,
"level": "warn",
"msg": "memberlist fast-join finished",
"ts": "2024-03-20T18:10:35.646641319Z"
}
2024-03-20 20:10:35.716
{
"caller": "memberlist_logger.go:74",
"level": "warn",
"msg": "Failed to resolve loki-memberlist: lookup loki-memberlist on 10.2.72.10:53: no such host",
"ts": "2024-03-20T18:10:35.646580392Z"
}
2024-03-20 20:10:35.716
{
"caller": "module_service.go:82",
"level": "info",
"module": "ring",
"msg": "initialising",
"ts": "2024-03-20T18:10:35.635205214Z"
}
2024-03-20 20:10:35.716
{
"caller": "module_service.go:82",
"level": "info",
"module": "memberlist-kv",
"msg": "initialising",
"ts": "2024-03-20T18:10:35.635054292Z"
}
2024-03-20 20:10:35.716
{
"caller": "module_service.go:82",
"level": "info",
"module": "analytics",
"msg": "initialising",
"ts": "2024-03-20T18:10:35.634904523Z"
}
2024-03-20 20:10:35.716
{
"caller": "module_service.go:82",
"level": "info",
"module": "server",
"msg": "initialising",
"ts": "2024-03-20T18:10:35.63481962Z"
}
2024-03-20 20:10:35.716
{
"caller": "module_service.go:82",
"level": "info",
"module": "runtime-config",
"msg": "initialising",
"ts": "2024-03-20T18:10:35.63478843Z"
}
2024-03-20 20:10:35.716
{
"caller": "mapper.go:47",
"level": "info",
"msg": "cleaning up mapped rules directory",
"path": "/tmp/loki/rules",
"ts": "2024-03-20T18:10:35.629398402Z"
}
2024-03-20 20:10:35.716
{
"caller": "shipper_index_client.go:76",
"index-store": "boltdb-shipper-2021-06-05",
"level": "info",
"msg": "starting boltdb shipper in RO mode",
"ts": "2024-03-20T18:10:35.628835924Z"
}
2024-03-20 20:10:35.716
{
"caller": "shipper.go:165",
"index-store": "boltdb-shipper-2021-06-05",
"level": "info",
"msg": "starting index shipper in RO mode",
"ts": "2024-03-20T18:10:35.628787761Z"
}
2024-03-20 20:10:35.716
{
"caller": "table_manager.go:271",
"distinct_users": "",
"distinct_users_len": 0,
"duration": "4.298µs",
"index-store": "boltdb-shipper-2021-06-05",
"level": "info",
"msg": "query readiness setup completed",
"ts": "2024-03-20T18:10:35.628700534Z"
}
2024-03-20 20:10:35.716
{
"caller": "memberlist_client.go:540",
"level": "info",
"msg": "memberlist fast-join starting",
"nodes_found": 1,
"to_join": 4,
"ts": "2024-03-20T18:10:35.626702642Z"
}
2024-03-20 20:10:35.716
{
"caller": "experimental.go:20",
"feature": "In-memory (FIFO) cache - chunksembedded-cache",
"level": "warn",
"msg": "experimental feature in use",
"ts": "2024-03-20T18:10:35.626084665Z"
}
2024-03-20 20:10:35.716
{
"caller": "cache.go:127",
"level": "warn",
"msg": "fifocache config is deprecated. use embedded-cache instead",
"ts": "2024-03-20T18:10:35.626023544Z"
}
2024-03-20 20:10:35.716
{
"caller": "memberlist_client.go:434",
"cluster_label": "",
"level": "info",
"msg": "Using memberlist cluster label and node name",
"node": "loki-ruler-5dbf96698-b7w9c-1ae6ca0e",
"ts": "2024-03-20T18:10:35.625559326Z"
}
2024-03-20 20:10:35.716
{
"caller": "server.go:322",
"grpc": "[::]:9095",
"http": "[::]:3100",
"level": "info",
"msg": "server listening on addresses",
"ts": "2024-03-20T18:10:35.624227Z"
}
2024-03-20 20:10:35.716
{
"caller": "main.go:108",
"level": "info",
"msg": "Starting Loki",
"ts": "2024-03-20T18:10:35.620615784Z",
"version": "(version=2.9.2, branch=HEAD, revision=a17308db6)"
}
2024-03-20 20:10:35.716
{
"caller": "loki.go:288",
"level": "warn",
"msg": "global timeout not configured, using default engine timeout (\"5m0s\"). This behavior will change in the next major to always use the default global timeout (\"5m\").",
"ts": "2024-03-20T18:10:35.617459845Z"
}
I have deployed query-frontend componetnt just for tests. The idea is not to use it.
Could you point me why I am getting ring error (I am not expecting any ring with just one component) and what I am doing wrong in general?
(using latest loki)
When I am using query-frontend within same deployment:
address: loki-query-frontend.logging.svc:9095
all looks fine