Greetings
We’ve been using Loki for quite a while and running version 2.8.6
. We received some feedback about Loki’s performance and especially about “occasional gaps in logs”. I’ve found this issue on Github that described what we are actually experiencing and lots of folks recommended rolling back to version 2.8.4
, reporting no issue reproduced there.
Well, we did it, since I thought going back on 2 patch versions in not a big deal and it appeared that in actually is. Our data in object store from the time before rollback is inaccessible now, Loki reports an error on them:
failed to get s3 object: NoSuchKey: The specified key does not exist. status code: 404
Did anyone have the same behaviour and how did you managed it? All data is in place in object store, but somehow Loki is unable to access it.
Our current setup is scalable two target mode (reader + writer) running in ECS Fargate.
Here is config file:
---
# General config
auth_enabled: ${AUTH_ENABLED:-true}
shutdown_delay: 30s
##################
# Server config
server:
http_listen_address: 0.0.0.0
http_listen_port: ${HTTP_LISTEN_PORT:-3100}
http_server_idle_timeout: 120s
http_server_read_timeout: 3m
http_server_write_timeout: 3m
grpc_listen_address: 0.0.0.0
grpc_listen_port: ${GRPC_LISTEN_PORT:-9096}
grpc_server_max_recv_msg_size: 20971520 # 20mb
grpc_server_max_send_msg_size: 20971520 # 20mb
grpc_server_max_connection_idle: 120s
grpc_server_max_connection_age: 1h
grpc_server_max_connection_age_grace: 120s
grpc_server_keepalive_time: 10m
graceful_shutdown_timeout: 10s
log_level: "${LOG_LEVEL:-info}"
log_source_ips_enabled: true
##################
# Distributor config
distributor:
ring:
kvstore:
store: memberlist
heartbeat_period: 5s
heartbeat_timeout: 30s
instance_interface_names:
- eth1
rate_store:
max_request_parallelism: 200
stream_rate_update_interval: 1s
ingester_request_timeout: 1s
##################
# Querier config
querier:
max_concurrent: 8
query_ingesters_within: 5m
engine:
timeout: 5m0s
max_look_back_period: 1m
query_store_only: false
multi_tenant_queries_enabled: true
##################
# Query-scheduler config
query_scheduler:
max_outstanding_requests_per_tenant: 100
querier_forget_delay: 0s
use_scheduler_ring: false
##################
# Frontend config
frontend:
address: ${LOKI_BIND_ADDR:-0.0.0.0}
port: ${GRPC_LISTEN_PORT:-9096}
scheduler_address: ${LOKI_READER_DNS:-loki-reader.cluster.internal}:${GRPC_LISTEN_PORT:-9096}
scheduler_dns_lookup_period: 10s
scheduler_worker_concurrency: 5
instance_interface_names:
- eth1
log_queries_longer_than: 0
query_stats_enabled: false
graceful_shutdown_timeout: 10s
compress_responses: true
##################
# Query-range config
query_range:
align_queries_with_step: true
cache_results: true
max_retries: 5
results_cache:
compression: snappy
cache:
default_validity: 12h
memcached:
expiration: 600s
batch_size: 512
memcached_client:
addresses: ${MEMCACHED_SERVER_URL:-localhost:11211}
timeout: 500ms
##################
# Ruler config
ruler:
enable_api: false
alertmanager_url: ${VM_ALERTMANAGER_URL:-alertmanager.cluster.internal:9999}
enable_alertmanager_discovery: false
alertmanager_refresh_interval: 1m0s
enable_alertmanager_v2: false
notification_queue_capacity: 10000
notification_timeout: 10s
for_outage_tolerance: 1h0m0s
for_grace_period: 10m0s
resend_delay: 1m0s
wal:
dir: /tmp/ruler-wal
storage:
type: s3
s3:
bucketnames: ${S3_BUCKET_NAME:-loki-store}
region: ${S3_BUCKET_REGION:-eu-central-1}
rule_path: /tmp/prom-rules
remote_write:
enabled: false
clients:
vm:
url: ${VM_RW_URL:-vm.cluster.internal:9999}
config_refresh_period: 10s
##################
# Ingester client config
ingester_client:
pool_config:
client_cleanup_period: 15s
health_check_ingesters: true
remote_timeout: 5s
remote_timeout: 5s
##################
# Ingester config
ingester:
lifecycler:
ring:
kvstore:
store: memberlist
heartbeat_timeout: 10s
replication_factor: 3
heartbeat_period: 5s
heartbeat_timeout: 10s
join_after: 0s
min_ready_duration: 15s
final_sleep: 3s
observe_period: 5s
unregister_on_shutdown: true
readiness_check_ring_health: false
address: ${LOKI_BIND_ADDR:-0.0.0.0}
port: ${GRPC_LISTEN_PORT:-9096}
max_transfer_retries: 0
concurrent_flushes: 32
flush_check_period: 10s
flush_op_timeout: 1m
chunk_retain_period: 1m
chunk_idle_period: 5m
chunk_block_size: 262144 # 256kb
chunk_target_size: 15728640 # 15mb
chunk_encoding: snappy
max_chunk_age: 2h
autoforget_unhealthy: true
max_returned_stream_errors: 10
query_store_max_look_back_period: 0s
wal:
enabled: true
dir: /tmp/ingester-wal
checkpoint_duration: 1m
flush_on_shutdown: true
replay_memory_ceiling: 50MB
index_shards: 32
max_dropped_streams: 10
##################
# Index gateway config
index_gateway:
mode: ring
ring:
kvstore:
store: memberlist
heartbeat_timeout: 10s
instance_interface_names:
- eth1
instance_addr: ${LOKI_BIND_ADDR:-0.0.0.0}
instance_port: ${GRPC_LISTEN_PORT:-9096}
replication_factor: 3
##################
# Storage config
storage_config:
aws:
http_config:
idle_conn_timeout: 3m
response_header_timeout: 10s
signature_version: v4
storage_class: STANDARD
index_cache_validity: 5m0s
index_queries_cache_config:
default_validity: 12h
memcached:
expiration: 600s
batch_size: 512
memcached_client:
addresses: ${MEMCACHED_SERVER_URL:-localhost:11211}
timeout: 500ms
disable_broad_index_queries: false
max_parallel_get_chunk: 150
max_chunk_batch_size: 50
boltdb_shipper:
active_index_directory: /tmp/boltdb-shipper-active
shared_store: s3
shared_store_key_prefix: index/
cache_location: /tmp/boltdb-shipper-cache
cache_ttl: 1h
resync_interval: 5m
index_gateway_client:
server_address: ${LOKI_READER_DNS:-loki-reader.cluster.internal}:${GRPC_LISTEN_PORT:-9096}
build_per_tenant_index: true
tsdb_shipper:
active_index_directory: /tmp/tsdb-index
shared_store: s3
shared_store_key_prefix: tsdb-index/
cache_location: /tmp/tsdb-cache
cache_ttl: 1h
resync_interval: 5m
index_gateway_client:
server_address: ${LOKI_READER_DNS:-loki-reader.cluster.internal}:${GRPC_LISTEN_PORT:-9096}
##################
# Chunk store config
chunk_store_config:
chunk_cache_config:
default_validity: 12h
memcached:
expiration: 600s
batch_size: 512
memcached_client:
addresses: ${MEMCACHED_SERVER_URL:-localhost:11211}
timeout: 500ms
write_dedupe_cache_config:
default_validity: 12h
memcached:
expiration: 600s
batch_size: 512
memcached_client:
addresses: ${MEMCACHED_SERVER_URL:-localhost:11211}
timeout: 500ms
cache_lookups_older_than: 1s
##################
# Period schema config
schema_config:
configs:
- from: 2021-01-01
store: boltdb-shipper
object_store: s3
schema: v12
index:
prefix: loki_index_
period: 24h
chunks:
prefix: loki_chunks_
period: 24h
row_shards: 16
# New TSDB schema below
- from: "2024-09-20"
store: tsdb
object_store: s3
schema: v12
index:
prefix: loki_index_
period: 24h
chunks:
prefix: loki_chunks_
period: 24h
row_shards: 16
##################
# Compactor config
compactor:
working_directory: /tmp/compactor
shared_store: s3
shared_store_key_prefix: compactor-index/
compaction_interval: 10m
apply_retention_interval: 60m
retention_enabled: false
retention_delete_delay: 2h
retention_delete_worker_count: 150
retention_table_timeout: 0s
delete_batch_size: 70
delete_request_cancel_period: 24h0m0s
delete_max_interval: 0s
max_compaction_parallelism: 1
upload_parallelism: 10
compactor_ring:
instance_id: "${HOSTNAME}"
instance_interface_names:
- eth1
instance_addr: ${LOKI_BIND_ADDR:-0.0.0.0}
instance_port: ${GRPC_LISTEN_PORT:-9096}
##################
# Limits config
limits_config:
ingestion_rate_strategy: local
ingestion_rate_mb: 100
ingestion_burst_size_mb: 150
max_label_name_length: 512
max_label_value_length: 2048
max_label_names_per_series: 24
reject_old_samples: true
reject_old_samples_max_age: 1w
creation_grace_period: 30m
enforce_metric_name: true
max_line_size: 0
max_line_size_truncate: false
increment_duplicate_timestamp: false
max_streams_per_user: 0
max_global_streams_per_user: 0
unordered_writes: true
per_stream_rate_limit: 50MB
per_stream_rate_limit_burst: 75MB
max_chunks_per_query: 2000000
max_query_series: 5000
max_query_lookback: 0s
max_query_length: 100d
max_query_parallelism: 32
tsdb_max_query_parallelism: 512
cardinality_limit: 100000
max_streams_matchers_per_query: 1000
max_concurrent_tail_requests: 10
max_entries_limit_per_query: 5000
max_cache_freshness_per_query: 1m
max_queriers_per_tenant: 0
query_timeout: 180s
split_queries_by_interval: 30m
min_sharding_lookback: 0s
retention_period: 370d
##################
# Frontend worker config
frontend_worker:
scheduler_address: ${LOKI_READER_DNS:-loki-reader.cluster.internal}:${GRPC_LISTEN_PORT:-9096}
dns_lookup_duration: 10s
parallelism: 20
match_max_concurrent: true
id: "${HOSTNAME}"
##################
# Other config
tracing:
enabled: false
analytics:
reporting_enabled: false
##################
# Memberlist settings
memberlist:
node_name: "${HOSTNAME}"
randomize_node_name: true
retransmit_factor: 3
pull_push_interval: 10s
gossip_interval: 200ms
gossip_to_dead_nodes_time: 10s
dead_node_reclaim_time: 15s
compression_enabled: true
join_members:
- dns+${LOKI_READER_DNS:-loki-reader.cluster.internal}:${LOKI_CLUSTER_LISTEN_PORT:-7946}
- dns+${LOKI_WRITER_DNS:-loki-writer.cluster.internal}:${LOKI_CLUSTER_LISTEN_PORT:-7946}
min_join_backoff: 1s
max_join_backoff: 1m
max_join_retries: 10
abort_if_cluster_join_fails: true
left_ingesters_timeout: 30s
leave_timeout: 10s
bind_addr:
- ${LOKI_BIND_ADDR:-0.0.0.0}
bind_port: ${LOKI_CLUSTER_LISTEN_PORT:-7946}
##################
# Common config
common:
storage:
s3:
bucketnames: ${S3_BUCKET_NAME:-loki-store}
region: ${S3_BUCKET_REGION:-eu-central-1}
persist_tokens: false
replication_factor: 3
ring:
kvstore:
store: memberlist
heartbeat_period: 5s
heartbeat_timeout: 30s
instance_interface_names:
- eth1
instance_addr: ${LOKI_BIND_ADDR:-0.0.0.0}
instance_port: ${GRPC_LISTEN_PORT:-9096}
instance_interface_names:
- eth1
instance_addr: ${LOKI_BIND_ADDR:-0.0.0.0}
compactor_address: http://${LOKI_READER_DNS:-loki-reader.cluster.internal}:${HTTP_LISTEN_PORT:-3100}
compactor_grpc_address: ${LOKI_READER_DNS:-loki-reader.cluster.internal}:${GRPC_LISTEN_PORT:-9096}
##################