Hi team,
We have a pretty high loaded distributed Loki deployment deployed using Helm charts on OpenShift cluster.
We see a lot of 500 errors without actual details like this in our Loki Distributor pods:
level=warn caller=logging.go:142 msg=“POST /loki/api/v1/push (500) 136.58547ms”
Also we see the error in our loki-gateway nginx logs like this:
admin [30/Sep/2025:09:58:45 +0000] 500 “POST /loki/api/v1/push HTTP/1.1” 104 “-” “Vector/0.46.1 (x86_64-unknown-linux-gnu 9a19e8a 2025-04-14 18:36:30.707862743)” “-”
We don’t actually notice any troubles with the ingester pods, we have 3 pods running
And seems like this causes some missing logs issues.
We use Vector as a source for Loki and AWS S3 bucket as a destination.
We tried to increase the debug level but still dont see any details about the error.
Can you please help to figure out the excact cause of the problem
Our Loki config:
—
deploymentMode: Distributedgateway:
image:
registry:quay.io
repository: nginx/nginx-unprivileged
tag: 1.24-alpine
nginxConfig:
resolver: “dns-default.openshift-dns.svc.cluster.local.”loki:
auth_enabled: true
commonConfig:
path_prefix: /var/loki
storage:
type: s3
schemaConfig:
configs:
- from: “2024-04-01”
store: tsdb
object_store: s3
schema: v13
index:
prefix: loki_index_
period: 24h
compactor:
working_directory: /var/loki/compactorpodSecurityContext:
runAsNonRoot: false
allowPrivilegeEscalation: falsecontainerSecurityContext:
runAsNonRoot: false
allowPrivilegeEscalation: false
capabilities:
drop:
- ALL
readOnlyRootFilesystem: truetest:
enabled: falsesidecar:
rules:
enabled: false
datasources:
enabled: falsesingleBinary:
replicas: 0
backend:
replicas: 0
read:
replicas: 0
write:
replicas: 0bloomPlanner:
replicas: 0
bloomBuilder:
replicas: 0
bloomGateway:
replicas: 0lokiCanary:
enabled: falseruler:
enabled: falseglobal:
extraArgs:
- “-log.level=debug”
gateway:
service:
type: LoadBalancer
resources:
requests:
cpu: 100m
memory: 128Mi
limits:
memory: 256Miloki:
commonConfig:
replication_factor: 3
storage:
type: s3
s3:
region: us-east-1
storage_config:
aws:
region: us-east-1
s3forcepathstyle: false
limits_config:
retention_period: 744h # 31 days retention
ingestion_rate_mb: 100
ingestion_burst_size_mb: 300
ingestion_rate_strategy: “local”
max_streams_per_user: 0
max_line_size: 2097152
per_stream_rate_limit: 100M
per_stream_rate_limit_burst: 400M
reject_old_samples: false
reject_old_samples_max_age: 168h
discover_service_name:
discover_log_levels: false
volume_enabled: true
max_global_streams_per_user: 75000
max_entries_limit_per_query: 100000
increment_duplicate_timestamp: true
allow_structured_metadata: true
runtimeConfig:
configs:
log_stream_creation: true
log_push_request: true
log_push_request_streams: true
log_duplicate_stream_info: true
ingester:
chunk_target_size: 8388608 # 8MB
chunk_idle_period: 5m
max_chunk_age: 2h
chunk_encoding: snappy # Compress data (reduces S3 transfer size)
chunk_retain_period: 1h # Keep chunks in memory after flush
flush_op_timeout: 10m # Add timeout for S3 operationsquerier:
max_concurrent: 8
query_range:
parallelise_shardable_queries: trueingester:
replicas: 3
autoscaling:
enabled: true
zoneAwareReplication:
enabled: true
maxUnavailable: 1
resources:
requests:
cpu: 500m
memory: 1Gi
limits:
cpu: 2000m
memory: 2Gi
persistence:
enabled: true
size: 10Gi
affinity: {}
podAntiAffinity:
soft: {}
hard: {}querier:
replicas: 3
autoscaling:
enabled: true
maxUnavailable: 1
resources:
requests:
cpu: 300m
memory: 512Mi
limits:
memory: 1Gi
affinity: {}queryFrontend:
replicas: 2
maxUnavailable: 1
resources:
requests:
cpu: 200m
memory: 256Mi
limits:
memory: 512MiqueryScheduler:
replicas: 2
maxUnavailable: 1
resources:
requests:
cpu: 200m
memory: 256Mi
limits:
memory: 512Midistributor:
replicas: 5
autoscaling:
enabled: true
minReplicas: 5
maxReplicas: 10
targetCPUUtilizationPercentage: 70
maxUnavailable: 1
resources:
requests:
cpu: 500m
memory: 1Gi
limits:
memory: 2Gi
affinity: {}compactor:
replicas: 1
retention_enabled: true
retention_delete_delay: 2h
retention_delete_worker_count: 150
resources:
requests:
cpu: 200m
memory: 512Mi
limits:
memory: 1GiindexGateway:
replicas: 2
maxUnavailable: 0
resources:
requests:
cpu: 300m
memory: 512Mi
limits:
memory: 1Gi
affinity: {}chunksCache:
enabled: true
replicas: 1resultsCache:
enabled: true
replicas: 1memcached:
enabled: truememcachedResults:
enabled: truememcachedChunks:
enabled: truememcachedFrontend:
enabled: truememcachedIndexQueries:
enabled: truememcachedIndexWrites:
enabled: trueminio:
enabled: falsememcachedExporter:
resources:
requests:
cpu: 50m
memory: 64Mi
limits:
memory: 128Mi