I am experiencing an issue with the Loki query frontend in our setup. Specifically, when I enable the query_scheduler.use_scheduler_ring
and set the store to memberlist
, the loki-query-frontend
pods will start receiving unexpected ping messages from other components. I’m trying to understand why it’s happening and how to resolve it.
Observations:
- When I enable the
query_scheduler.use_scheduler_ring
, the following warning logs appear in query_frontend pod:
Got ping for unexpected node 'loki-query-frontend--dg8cm-7de9dfbf' from=10.x.47.182:7946
Got ping for unexpected node 'loki-query-frontend--dg8cm-7de9dfbf' from=10.x.170.252:7946
Got ping for unexpected node 'loki-query-frontend--dg8cm-7de9dfbf' from=10.x.29.45:7946
- The ping requests are coming from various other pods in loki, but the pod (
loki-query-frontend-66745846dc-dg8cm-7de9dfbf
) is not expecting them.
Helm Chart and App Version:
- Helm Chart Version: loki-6.24.0
- App Version: 3.3.2
- Deployment Mode: Distributed Deployment
Config:
auth_enabled: true
bloom_build:
builder:
planner_address: loki-bloom-planner-headless.loki.svc.cluster.local:9095
enabled: false
bloom_gateway:
client:
addresses: dnssrvnoa+_grpc._tcp.loki-bloom-gateway-headless.loki.svc.cluster.local
enabled: false
chunk_store_config:
chunk_cache_config:
background:
writeback_buffer: 500000
writeback_goroutines: 1
writeback_size_limit: 500MB
default_validity: 0s
memcached:
batch_size: 256
parallelism: 10
memcached_client:
addresses: dnssrvnoa+_memcached-client._tcp.loki-chunks-cache.loki.svc
consistent_hash: true
max_idle_conns: 72
timeout: 2000ms
common:
compactor_address: 'http://loki-compactor:3100'
path_prefix: /var/loki
replication_factor: 3
ring:
kvstore:
store: memberlist
storage:
s3:
access_key_id: ${ACCESS_KEY_ID}
bucketnames: xx
endpoint: xx.com
insecure: false
s3forcepathstyle: true
secret_access_key: ${ACCESS_KEY_SECRET}
compactor:
compaction_interval: 10m
compactor_ring:
kvstore:
store: memberlist
delete_request_store: s3
retention_delete_delay: 2h
retention_delete_worker_count: 150
retention_enabled: true
frontend:
grpc_client_config:
grpc_compression: snappy
querier_forget_delay: 1m
scheduler_address: loki-query-scheduler.loki.svc.cluster.local:9095
tail_proxy_url: http://loki-querier.loki.svc.cluster.local:3100
frontend_worker:
grpc_client_config:
grpc_compression: snappy
scheduler_address: loki-query-scheduler.loki.svc.cluster.local:9095
index_gateway:
mode: ring
ring:
kvstore:
store: memberlist
ingester:
chunk_encoding: snappy
chunk_idle_period: 30m
chunk_target_size: 5242880
max_chunk_age: 2h
wal:
flush_on_shutdown: true
ingester_client:
grpc_client_config:
grpc_compression: snappy
limits_config:
allow_structured_metadata: true
max_cache_freshness_per_query: 10m
max_query_capacity: 0.5
max_query_lookback: 72h
query_timeout: 300s
reject_old_samples: true
reject_old_samples_max_age: 168h
retention_period: 192h
split_queries_by_interval: 15m
volume_enabled: true
memberlist:
join_members:
- loki-memberlist
pattern_ingester:
enabled: false
querier:
max_concurrent: 16
query_range:
align_queries_with_step: true
cache_results: true
results_cache:
cache:
background:
writeback_buffer: 500000
writeback_goroutines: 1
writeback_size_limit: 500MB
default_validity: 12h
memcached_client:
addresses: dnssrvnoa+_memcached-client._tcp.loki-results-cache.loki.svc
consistent_hash: true
max_idle_conns: 16
timeout: 500ms
update_interval: 1m
query_scheduler:
grpc_client_config:
grpc_compression: snappy
querier_forget_delay: 1m
ruler:
enable_sharding: true
evaluation:
mode: remote
query_frontend:
address: dns+loki-query-frontend.loki.svc.cluster.local:9095
sharding_algo: by-rule
storage:
s3:
access_key_id: ${ACCESS_KEY_ID}
bucketnames: xx
endpoint: xx
insecure: false
s3forcepathstyle: true
secret_access_key: ${ACCESS_KEY_SECRET}
type: s3
wal:
dir: /var/loki/ruler-wal
runtime_config:
file: /etc/loki/runtime-config/runtime-config.yaml
schema_config:
configs:
- from: "2024-08-16"
index:
period: 24h
prefix: loki_index_
object_store: s3
schema: v13
store: tsdb
server:
grpc_listen_port: 9095
http_listen_port: 3100
http_server_read_timeout: 600s
http_server_write_timeout: 600s
storage_config:
bloom_shipper:
working_directory: /var/loki/data/bloomshipper
boltdb_shipper:
index_gateway_client:
server_address: dns+loki-index-gateway-headless.loki.svc.cluster.local:9095
hedging:
at: 250ms
max_per_second: 20
up_to: 3
tsdb_shipper:
active_index_directory: /var/loki/index
cache_location: /var/loki/cache
cache_ttl: 168h
index_gateway_client:
server_address: dns+loki-index-gateway-headless.loki.svc.cluster.local:9095
tracing:
enabled: false