Loki configuration
loki_version: “2.9.4”
loki_auth_url: loki.it.ufl.edu
loki_cert: “{{ lookup(‘hashi_vault’, ‘secret/data/services/certs/irs/lb.it.ufl.edu:fullchain’ ) }}”
loki_key: “{{ lookup(‘hashi_vault’, ‘secret/data/services/certs/irs/lb.it.ufl.edu:key’ ) }}”
loki_http_port: 8443
loki_grpc_port: 9443
loki_systemd_environment: >-
GOMAXPROCS={{ ansible_processor_vcpus | default(ansible_processor_count) }}
GOGC=20
loki_system_user: loki
loki_system_group: loki
loki_config_dir: /etc/loki
loki_storage_dir: /loki
loki_auth_enabled: true
_max_tenant_throughput_mb: 40
Recommended burst value is 1.5x the max throughput value
_max_tenant_throughput_burst_mb: 60
_max_query_timeout: 600
loki_config:
common:
replication_factor: 3
ring:
kvstore:
store: memberlist
heartbeat_timeout: 10m
storage:
s3:
bucketnames: loki
endpoint: object-prod.ceph.apps.it.ufl.edu
region: default
access_key_id: “{{ lookup(‘hashi_vault’, ‘{{ vault.ceph_ansible_secrets_path }}/{{ deployment.environment }}/object_users/loki:access_key’ ) }}”
secret_access_key: “{{ lookup(‘hashi_vault’, ‘{{ vault.ceph_ansible_secrets_path }}/{{ deployment.environment }}/object_users/loki:secret_key’ ) }}”
insecure: false
s3forcepathstyle: true
http_config:
insecure_skip_verify: true
server:
log_level: debug
http_listen_port: “{{ loki_http_port }}”
http_tls_config:
cert_file: “{{ loki_config_dir }}/ssl/cert.crt”
key_file: “{{ loki_config_dir }}/ssl/cert.key”
http_server_read_timeout: “{{ _max_query_timeout + 10 }}s”
http_server_write_timeout: “{{ _max_query_timeout + 10 }}s”
grpc_listen_port: "{{ loki_grpc_port }}"
grpc_server_max_recv_msg_size: 104857600
grpc_server_max_send_msg_size: 104857600
grpc_server_max_concurrent_streams: 1500
ingester:
chunk_idle_period: 1h
max_chunk_age: 2h
flush_check_period: 10s
wal:
replay_memory_ceiling: “{{ (ansible_memtotal_mb * 0.75) | int }}MB”
querier:
max_concurrent: 6000
multi_tenant_queries_enabled: true
memberlist:
abort_if_cluster_join_fails: false
bind_port: 7946
join_members: “{{ groups[‘loki’] }}”
max_join_backoff: 1m
max_join_retries: 10
min_join_backoff: 1s
# auto attempt to rejoin cluster if disconnected, helps prevent split brain
rejoin_interval: 1m
schema_config:
configs:
- from: “2023-03-15”
store: tsdb
object_store: s3
schema: v12
index:
prefix: index_tsdb_
period: 24h
storage_config:
hedging:
at: “250ms”
max_per_second: 20
up_to: 3
tsdb_shipper:
active_index_directory: “{{ loki_storage_dir }}/tsdb-shipper-active”
cache_location: “{{ loki_storage_dir }}/tsdb-shipper-cache”
shared_store: s3
frontend:
log_queries_longer_than: 15s
compress_responses: true
frontend_worker:
frontend_address: frontend.loki.it.ufl.edu:9443
grpc_client_config:
max_send_msg_size: 1.048576e+08
parallelism: 6
query_range:
align_queries_with_step: true
max_retries: 5
cache_results: true
results_cache:
cache:
embedded_cache:
enabled: true
max_size_mb: 2048
ttl: 1h
query_scheduler:
max_outstanding_requests_per_tenant: 42768
limits_config:
enforce_metric_name: false
# Throughput for a tenant/user/org-id per node
ingestion_rate_mb: “{{ _max_tenant_throughput_mb }}”
ingestion_burst_size_mb: “{{ _max_tenant_throughput_burst_mb }}”
per_stream_rate_limit: “{{ _max_tenant_throughput_mb }}MB”
per_stream_rate_limit_burst: “{{ _max_tenant_throughput_burst_mb }}MB”
max_entries_limit_per_query: 100000
max_global_streams_per_user: 20000
retention_period: 2w
query_timeout: 3m
max_cache_freshness_per_query: “10m”
# parallelize queries in 15min intervals
split_queries_by_interval: 15m
# limit how far back we will accept logs
reject_old_samples: true
# Increase maxinum from default 500 to prevent the error maximum of series (500) reached for a single query
max_query_series: 10000
max_query_parallelism: 6
compactor:
working_directory: “{{ loki_storage_dir }}/compactor”
shared_store: s3
compaction_interval: 1m
retention_enabled: true