Hi all,
we are running Loki in monolithic mode within our Kubernetes cluster. A Grafana Alloy Daemonset is sending logs from every of the clusters nodes.
One of our components is writing 15k logs per second at peek and somehow there are huge gaps (sometimes more than 30s) in these logs. Having a look at the logs located on the node, there are no gaps, so the components writes all its logs correctly, but they dont arrive at loki and so are not available at Grafana when sending a query for these logs.
Already tried a lot of different configurations, which look like this at the moment:
Loki-configs:
config.yaml: |
analytics:
reporting_enabled: false
auth_enabled: false
common:
compactor_address: 'http://loki:3100'
path_prefix: /var/loki
replication_factor: 1
storage:
filesystem:
chunks_directory: /var/loki/chunks
rules_directory: /var/loki/rules
compactor:
compaction_interval: 5m
delete_request_store: filesystem
retention_delete_delay: 1h
retention_enabled: true
working_directory: /var/loki/compactor
frontend:
scheduler_address: ""
tail_proxy_url: "http://loki-querier.{{ .Release.Namespace }}.svc.cluster.local:3100"
frontend_worker:
scheduler_address: ""
index_gateway:
mode: ring
ingester:
lifecycler:
heartbeat_timeout: 10m
chunk_target_size: 1572864
chunk_idle_period: 5m
max_chunk_age: 4h
flush_check_period: 12s
wal:
enabled: true
dir: /var/loki/wal
flush_op_backoff:
min_period: 500ms
max_period: 10s
ingester_client:
grpc_client_config:
max_recv_msg_size: 209715200
max_send_msg_size: 104857600
remote_timeout: 60s
limits_config:
ingestion_burst_size_mb: 256
ingestion_rate_mb: 256
per_stream_rate_limit: 25MB
per_stream_rate_limit_burst: 50MB
max_query_lookback: {{ .Values.retentionPeriod }}
max_query_series: 10000
max_entries_limit_per_query: 20000
query_timeout: 300s
retention_period: {{ .Values.retentionPeriod }}
max_label_value_length: 40960
max_line_size: 512KB
max_line_size_truncate: true
max_structured_metadata_size: 256KB
max_streams_per_user: 100000
max_global_streams_per_user: 200000
memberlist:
join_members:
- loki
pattern_ingester:
enabled: false
ruler:
storage:
type: local
schema_config:
configs:
- from: "2024-01-01"
index:
period: 24h
prefix: loki_index_
object_store: filesystem
schema: v13
store: tsdb
server:
grpc_listen_port: 9995
http_listen_port: 3100
http_server_read_timeout: 600s
http_server_write_timeout: 600s
grpc_server_max_recv_msg_size: 209715200
grpc_server_max_send_msg_size: 104857600
storage_config:
filesystem:
directory: /var/loki/chunks
hedging:
at: 250ms
max_per_second: 20
up_to: 3
tsdb_shipper:
active_index_directory: /var/loki/tsdb-shipper-active
index_gateway_client:
server_address: ""
tracing:
enabled: false
containers:
- name: loki
resources:
requests:
cpu: 2
memory: 1Gi
limits:
cpu: 4
memory: 4Gi
Alloy-configuration regarding logs:
loki.write "lokilogs_integrations" {
endpoint {
url = "http://loki.{{ .Release.Namespace }}.svc.cluster.local:3100/loki/api/v1/push"
batch_wait = "1s"
batch_size = "8MiB"
remote_timeout = "60s"
}
external_labels = {
cluster = "{{ .Values.global.clusterurl }}",
job = "integrations/logging",
}
}
loki.process "logs_integrations_integrations_kubernetes_pod_logs" {
forward_to = [loki.write.lokilogs_integrations.receiver]
stage.docker {}
stage.match {
selector = "{app=~\".*mfc.*\"}"
stage.replace {
expression = "(.+\\s[F,P]\\s)"
replace = ""
}
stage.multiline {
firstline = "^(\\[[^\\]]*\\] ){4}"
max_lines = 8000
max_wait_time = "30s"
}
stage.regex {
expression = "^\\[(?P<time>[^\\]]+)\\] \\[(?P<level>[^ \\]]+) *\\] \\[(?P<threadId>[^ \\]]+) *\\] \\[(?P<logger>[^\\]]+)\\] ( ?\\[(?P<threadExecutionContext>[^\\]]+)\\])? ?(?P<message>.+)"
}
stage.timestamp {
source = "time"
format = "RFC3339Nano"
}
stage.structured_metadata {
values = {
level = "",
logger = "",
message = "",
threadExecutionContext = "",
threadId = "",
}
}
}
}
discovery.relabel "logs_integrations_integrations_kubernetes_pod_logs" {
targets = discovery.kubernetes.k8s_pods.targets
rule {
source_labels = ["__address__", "__meta_kubernetes_pod_container_port_number"]
regex = "(.+)"
action = "replace"
target_label = "__address__"
replacement = "$1:$2"
}
rule {
source_labels = ["__meta_kubernetes_pod_controller_name"]
regex = "([0-9a-z-.]+?)(-[0-9a-f]{8,10})?"
target_label = "__tmp_controller_name"
}
rule {
source_labels = ["__meta_kubernetes_pod_label_app_kubernetes_io_name", "__meta_kubernetes_pod_label_app", "__tmp_controller_name", "__meta_kubernetes_pod_name"]
regex = "^;*([^;]+)(;.*)?$"
target_label = "app"
}
rule {
source_labels = ["__meta_kubernetes_pod_label_app_kubernetes_io_instance", "__meta_kubernetes_pod_label_instance"]
regex = "^;*([^;]+)(;.*)?$"
target_label = "instance"
}
rule {
source_labels = ["__meta_kubernetes_pod_label_app_kubernetes_io_component", "__meta_kubernetes_pod_label_component"]
regex = "^;*([^;]+)(;.*)?$"
target_label = "component"
}
rule {
source_labels = ["__meta_kubernetes_pod_node_name"]
target_label = "node_name"
}
rule {
source_labels = ["__meta_kubernetes_namespace"]
target_label = "namespace"
}
rule {
source_labels = ["namespace", "app"]
separator = "/"
target_label = "service"
}
rule {
source_labels = ["__meta_kubernetes_pod_name"]
target_label = "pod"
}
rule {
source_labels = ["__meta_kubernetes_pod_container_name"]
target_label = "container"
}
rule {
source_labels = ["__meta_kubernetes_pod_uid", "__meta_kubernetes_pod_container_name"]
separator = "/"
target_label = "__path__"
replacement = "/var/log/pods/*$1/*.log"
}
rule {
source_labels = ["__meta_kubernetes_pod_annotationpresent_kubernetes_io_config_hash", "__meta_kubernetes_pod_annotation_kubernetes_io_config_hash", "__meta_kubernetes_pod_container_name"]
separator = "/"
regex = "true/(.*)"
target_label = "__path__"
replacement = "/var/log/pods/*$1/*.log"
}
}
Hopefully you guys have some idea which can solve these log gaps.
Kind regards, Christian