I’m currently testing the bloom components to speed up the queries.
Bloom planner, builder and gateway are deployed and seem to work fine.
Queriers are returning the logs and I do not see any issue on the queriers logs.
If I change the tsdb_sharding_strategy to bounded, which is the recommended setting to accelerate the queries (Bloom filters (Experimental) | Grafana Loki documentation) using the blooms I start to have the following error on logs:
{"caller":"parallel_chunk_fetch.go:71","err":"failed to load chunk 'stg_tenant/b971048a8e8bf86a/193cee9993c:193cee9e63a:0': failed to get s3 object: NoSuchKey: The specified key does not exist.\n\tstatus code: 404, request id: , host id: ","level":"error","msg":"error fetching chunks","ts":"2024-12-16T11:00:59.26124264Z"}
Also, after changing the tsdb sharding strategy to bounded the queriers only return about 30 minutes of logs (even if I put 24h per example). If I remove tsdb_sharding_strategy = bounded from the configuration (to use the default power of two) the error logs disappear and the queriers return the logs without problems.
Full loki querier configuration:
auth_enabled: true
bloom_gateway:
client:
addresses: dnssrv+_loki-blooms._tcp.service.consul-general.dummydomain.com
grpc_client_config:
tls_ca_path: /etc/loki/ssl/loki_ca_bundle.crt
tls_cert_path: /etc/loki/ssl/loki.crt
tls_enabled: true
tls_key_path: /etc/loki/ssl/loki.key
enabled: false
worker_concurrency: 8
chunk_store_config:
chunk_cache_config:
background:
writeback_buffer: 500000
writeback_goroutines: 1
writeback_size_limit: 500MB
default_validity: 168h
memcached:
batch_size: 500
expiration: 168h
parallelism: 100
memcached_client:
addresses: dns+memcached-chunks.service.consul-general.dummydomain.com:11211
consistent_hash: true
max_idle_conns: 72
timeout: 60s
common:
compactor_grpc_address: loki-backends.service.consul-general.dummydomain.com:9095
replication_factor: 2
ring:
kvstore:
consul:
host: localhost:8500
prefix: loki-general/collectors/
store: consul
storage:
s3:
access_key_id: dummyaccesskey
bucketnames: loki
endpoint: 192.168.0.1
http_config:
insecure_skip_verify: true
insecure: false
s3forcepathstyle: true
secret_access_key: dummysecretkey
compactor:
compactor_ring:
heartbeat_period: 15s
heartbeat_timeout: 1m
instance_availability_zone: az1
kvstore:
consul:
host: localhost:8500
prefix: loki-general/collectors/
store: consul
zone_awareness_enabled: true
delete_request_store: s3
max_compaction_parallelism: 8
retention_delete_delay: 5m
retention_enabled: true
working_directory: /srv/loki/compactor
compactor_grpc_client:
grpc_compression: snappy
tls_ca_path: /etc/loki/ssl/loki_ca_bundle.crt
tls_cert_path: /etc/loki/ssl/loki.crt
tls_enabled: true
tls_key_path: /etc/loki/ssl/loki.key
distributor:
ring:
kvstore:
consul:
host: localhost:8500
prefix: loki-general/collectors/
store: consul
frontend:
grpc_client_config:
grpc_compression: snappy
tls_ca_path: /etc/loki/ssl/loki_ca_bundle.crt
tls_cert_path: /etc/loki/ssl/loki.crt
tls_enabled: true
tls_key_path: /etc/loki/ssl/loki.key
log_queries_longer_than: 60s
scheduler_address: loki-backends.service.consul-general.dummydomain.com:9095
scheduler_worker_concurrency: 5
tail_tls_config:
tls_ca_path: /etc/loki/ssl/loki_ca_bundle.crt
tls_cert_path: /etc/loki/ssl/loki.crt
tls_key_path: /etc/loki/ssl/loki.key
frontend_worker:
grpc_client_config:
grpc_compression: snappy
tls_ca_path: /etc/loki/ssl/loki_ca_bundle.crt
tls_cert_path: /etc/loki/ssl/loki.crt
tls_enabled: true
tls_key_path: /etc/loki/ssl/loki.key
id: loki-queriers-001
scheduler_address: loki-backends.service.consul-general.dummydomain.com:9095
index_gateway:
ring:
heartbeat_period: 5s
heartbeat_timeout: 1m
instance_availability_zone: az1
kvstore:
consul:
host: localhost:8500
prefix: loki-general/collectors/
store: consul
replication_factor: 2
zone_awareness_enabled: true
ingester:
lifecycler:
availability_zone: az1
heartbeat_period: 5s
heartbeat_timeout: 1m
ring:
heartbeat_timeout: 1m
kvstore:
consul:
host: localhost:8500
prefix: loki-general/collectors/
store: consul
replication_factor: 2
zone_awareness_enabled: true
wal:
dir: /srv/loki/wal
enabled: true
ingester_client:
grpc_client_config:
grpc_compression: snappy
tls_ca_path: /etc/loki/ssl/loki_ca_bundle.crt
tls_cert_path: /etc/loki/ssl/loki.crt
tls_enabled: true
tls_key_path: /etc/loki/ssl/loki.key
remote_timeout: 30s
limits_config:
allow_deletes: true
allow_structured_metadata: true
bloom_block_encoding: snappy
bloom_creation_enabled: true
bloom_gateway_enable_filtering: true
bloom_split_series_keyspace_by: 1024
cardinality_limit: 10000000
deletion_mode: filter-and-delete
ingestion_burst_size_mb: 1024
ingestion_rate_mb: 1024
max_cache_freshness_per_query: 10m
max_chunks_per_query: 50000
max_querier_bytes_read: 0
max_query_parallelism: 256
max_query_series: 99999999
otlp_config:
resource_attributes:
attributes_config:
- action: index_label
attributes:
- stack
- provider
- jurisdiction
- region
- service
- role
ignore_defaults: true
query_timeout: 8m
reject_old_samples: false
reject_old_samples_max_age: 2w
retention_period: 30d
shard_streams:
enabled: true
split_instant_metric_queries_by_interval: 1h
split_metadata_queries_by_interval: 1d
split_queries_by_interval: 15m
tsdb_max_query_parallelism: 1024
tsdb_precompute_chunks: true
tsdb_sharding_strategy: bounded
unordered_writes: true
volume_enabled: true
volume_max_series: 100000000
pattern_ingester:
client_config:
grpc_client_config:
grpc_compression: snappy
tls_ca_path: /etc/loki/ssl/loki_ca_bundle.crt
tls_cert_path: /etc/loki/ssl/loki.crt
tls_enabled: true
tls_key_path: /etc/loki/ssl/loki.key
enabled: true
lifecycler:
availability_zone: az1
ring:
replication_factor: 1
zone_awareness_enabled: true
querier:
max_concurrent: 16
multi_tenant_queries_enabled: true
query_range:
align_queries_with_step: false
cache_index_stats_results: true
cache_instant_metric_results: true
cache_label_results: true
cache_results: true
cache_series_results: true
cache_volume_results: true
index_stats_results_cache:
cache:
default_validity: 0s
memcached:
batch_size: 4
expiration: 168h
parallelism: 5
memcached_client:
addresses: dnssrv+_memcached-logging._tcp.service.consul-general.dummydomain.com
compression: snappy
instant_metric_query_split_align: true
instant_metric_results_cache:
cache:
default_validity: 0s
memcached:
batch_size: 4
expiration: 168h
parallelism: 5
memcached_client:
addresses: dnssrv+_memcached-logging._tcp.service.consul-general.dummydomain.com
compression: snappy
label_results_cache:
cache:
default_validity: 0s
memcached:
batch_size: 4
expiration: 168h
parallelism: 5
memcached_client:
addresses: dnssrv+_memcached-logging._tcp.service.consul-general.dummydomain.com
compression: snappy
max_retries: 15
results_cache:
cache:
default_validity: 0s
memcached:
batch_size: 4
expiration: 168h
parallelism: 5
memcached_client:
addresses: dnssrv+_memcached-logging._tcp.service.consul-general.dummydomain.com
compression: snappy
series_results_cache:
cache:
default_validity: 0s
memcached:
batch_size: 4
expiration: 168h
parallelism: 5
memcached_client:
addresses: dnssrv+_memcached-logging._tcp.service.consul-general.dummydomain.com
compression: snappy
volume_results_cache:
cache:
default_validity: 0s
memcached:
batch_size: 4
expiration: 168h
parallelism: 5
memcached_client:
addresses: dnssrv+_memcached-logging._tcp.service.consul-general.dummydomain.com
compression: snappy
query_scheduler:
scheduler_ring:
heartbeat_period: 15s
heartbeat_timeout: 1m
instance_availability_zone: az1
kvstore:
consul:
host: localhost:8500
prefix: loki-general/collectors/
store: consul
zone_awareness_enabled: true
use_scheduler_ring: true
ruler:
alertmanager_url: dns+_alert-manager._tcp.dummydomain.com
enable_alertmanager_discovery: true
enable_alertmanager_v2: true
enable_api: true
enable_sharding: true
remote_write:
clients:
mimir:
headers:
X-Scope-OrgID: dummy_xcope
tls_config:
insecure_skip_verify: true
url: https://dummy-remote.dummydomain.com:8080/api/v1/push
ring:
heartbeat_period: 5s
heartbeat_timeout: 1m
kvstore:
consul:
host: localhost:8500
prefix: loki-general/rulers/
store: consul
rule_path: /srv/loki/rules
wal:
dir: /srv/loki/ruler-wal
runtime_config:
file: /etc/loki/loki_runtime.yml
period: 10s
schema_config:
configs:
- from: 2024-07-10
index:
period: 24h
prefix: index_
object_store: s3
schema: v13
store: tsdb
server:
graceful_shutdown_timeout: 30s
grpc_listen_port: 9095
grpc_server_keepalive_time: 2h
grpc_server_keepalive_timeout: 20s
grpc_server_max_concurrent_streams: 0
grpc_server_max_recv_msg_size: 104857600
grpc_server_max_send_msg_size: 104857600
grpc_server_min_time_between_pings: 10s
grpc_tls_config:
cert_file: /etc/loki/ssl/loki.crt
client_auth_type: RequestClientCert
client_ca_file: /etc/loki/ssl/loki_ca_bundle.crt
key_file: /etc/loki/ssl/loki.key
http_listen_port: 3100
http_server_idle_timeout: 5m
http_server_read_timeout: 2m
http_server_write_timeout: 2m
http_tls_config:
cert_file: /etc/loki/ssl/loki.crt
client_auth_type: RequestClientCert
client_ca_file: /etc/loki/ssl/loki_ca_bundle.crt
key_file: /etc/loki/ssl/loki.key
log_format: json
log_level: debug
log_source_ips_enabled: true
storage_config:
bloom_shipper:
blocks_cache:
hard_limit: 64GB
soft_limit: 32GB
ttl: 24h
download_parallelism: 16
max_query_page_size: 512MiB
working_directory: /srv/loki/bloom
disable_broad_index_queries: true
index_queries_cache_config:
default_validity: 0s
memcached:
batch_size: 4
expiration: 168h
parallelism: 5
memcached_client:
addresses: dnssrv+_memcached-logging._tcp.service.consul-general.dummydomain.com
timeout: 2s
max_chunk_batch_size: 150
max_parallel_get_chunk: 1500
tsdb_shipper:
active_index_directory: /srv/loki/index
cache_location: /srv/loki/index_cache
cache_ttl: 168h
index_gateway_client:
grpc_client_config:
grpc_compression: snappy
tls_ca_path: /etc/loki/ssl/loki_ca_bundle.crt
tls_cert_path: /etc/loki/ssl/loki.crt
tls_enabled: true
tls_key_path: /etc/loki/ssl/loki.key
server_address: dns+_loki-backends._tcp.service.consul-general.dummydomain.com:9095
query_ready_num_days: 7
target: read