Hello! I have error “SERVER_ERROR out of memory storing object”
loki-ingester-zone-c-0 ingester level=warn ts=2024-11-22T22:00:36.111916583Z caller=background.go:200 msg="backgroundCache writeBackLoop Cache.Store fail" err="server=10.240.30.133:11211: memcache: unexpected response line from \"set\": \"SERVER_ERROR out of memory storing object\\r\\n\""
loki-ingester-zone-a-2 ingester level=warn ts=2024-11-23T01:13:35.39775891Z caller=background.go:200 msg="backgroundCache writeBackLoop Cache.Store fail" err="server=10.240.30.133:11211: memcache: unexpected response line from \"set\": \"SERVER_ERROR out of memory storing object\\r\\n\""
loki-querier-5c89b58f7b-7x99z querier level=warn ts=2024-11-22T10:37:27.035726202Z caller=background.go:200 msg="backgroundCache writeBackLoop Cache.Store fail" err="server=10.240.30.133:11211: memcache: unexpected response line from \"set\": \"SERVER_ERROR out of memory storing object\\r\\n\""
How troubleshooting this error?
my loki helm chart version: 6.10.0
my values.yaml
deploymentMode: Distributed
global:
image:
registry: "harbor.mos.corp/dockerhub"
.globalExtra: &globalExtra
extraArgs:
- "-config.expand-env=true"
extraEnv:
- name: S3_ACCESS_KEY_ID
valueFrom:
secretKeyRef:
name: loki-s3
key: S3_ACCESS_KEY_ID
- name: S3_SECRET_ACCESS_KEY
valueFrom:
secretKeyRef:
name: loki-s3
key: S3_SECRET_ACCESS_KEY
tolerations:
- key: "role"
operator: "Equal"
value: "loki"
effect: "NoSchedule"
nodeSelector:
role: loki
gateway:
enabled: false
ingress:
enabled: true
ingressClassName: nginx
annotations:
cert-manager.io/cluster-issuer: "cluster-issuer"
hosts:
- loki-core-v2.mos.corp
serviceMonitor:
enabled: false
compactor:
enabled: true
replicas: 1
<<: *globalExtra
persistence:
enabled: true
claims:
- name: data
size: 50Gi
storageClass: yc-network-ssd
resources:
requests:
memory: "2Gi"
cpu: "800m"
limits:
memory: "2Gi"
distributor:
<<: *globalExtra
replicas: 1
autoscaling:
enabled: true
minReplicas: 3
maxReplicas: 7
targetCPUUtilizationPercentage: 70
targetMemoryUtilizationPercentage: 70
resources:
requests:
memory: "260Mi"
cpu: "300m"
limits:
memory: "260Mi"
maxUnavailable: 1
affinity:
podAntiAffinity:
requiredDuringSchedulingIgnoredDuringExecution: []
preferredDuringSchedulingIgnoredDuringExecution:
- weight: 99
podAffinityTerm:
labelSelector:
matchLabels:
app.kubernetes.io/component: distributor
topologyKey: kubernetes.io/hostname
- weight: 100
podAffinityTerm:
labelSelector:
matchLabels:
app.kubernetes.io/component: distributor
topologyKey: topology.kubernetes.io/zone
ingester:
<<: *globalExtra
replicas: 8
persistence:
enabled: true
claims:
- name: data
size: 50Gi
storageClass: yc-network-ssd
autoscaling:
enabled: false
resources:
requests:
memory: "5Gi"
cpu: "600m"
limits:
memory: "10Gi"
maxUnavailable: 1
zoneAwareReplication:
zoneA:
nodeSelector:
topology.kubernetes.io/zone: ru-central1-a
role: loki
zoneB:
nodeSelector:
topology.kubernetes.io/zone: ru-central1-b
role: loki
zoneC:
nodeSelector:
topology.kubernetes.io/zone: ru-central1-d
role: loki
indexGateway:
<<: *globalExtra
replicas: 3
enabled: true
persistence:
enabled: true
storageClass: yc-network-ssd
resources:
requests:
memory: "2Gi"
cpu: "500m"
limits:
memory: "4Gi"
maxUnavailable: 1
querier:
<<: *globalExtra
autoscaling:
enabled: true
minReplicas: 7
maxReplicas: 15
targetCPUUtilizationPercentage: 70
targetMemoryUtilizationPercentage: 70
resources:
requests:
memory: "3Gi"
cpu: "2500m"
limits:
memory: "5Gi"
affinity:
podAntiAffinity:
requiredDuringSchedulingIgnoredDuringExecution: []
preferredDuringSchedulingIgnoredDuringExecution:
- weight: 99
podAffinityTerm:
labelSelector:
matchLabels:
app.kubernetes.io/component: querier
topologyKey: kubernetes.io/hostname
- weight: 100
podAffinityTerm:
labelSelector:
matchLabels:
app.kubernetes.io/component: querier
topologyKey: topology.kubernetes.io/zone
maxUnavailable: 1
queryFrontend:
<<: *globalExtra
autoscaling:
enabled: true
minReplicas: 2
maxReplicas: 4
targetCPUUtilizationPercentage: 60
targetMemoryUtilizationPercentage: 60
resources:
requests:
memory: "1024Mi"
cpu: "100m"
limits:
memory: "1024Mi"
affinity:
podAntiAffinity:
requiredDuringSchedulingIgnoredDuringExecution: [ ]
preferredDuringSchedulingIgnoredDuringExecution:
- weight: 99
podAffinityTerm:
labelSelector:
matchLabels:
app.kubernetes.io/component: query-frontend
topologyKey: kubernetes.io/hostname
- weight: 100
podAffinityTerm:
labelSelector:
matchLabels:
app.kubernetes.io/component: query-frontend
topologyKey: topology.kubernetes.io/zone
maxUnavailable: 1
queryScheduler:
<<: *globalExtra
replicas: 1
loki:
auth_enabled: false
storage:
type: s3
bucketNames:
chunks: core-loki
ruler: core-loki
admin: core-loki
s3:
endpoint: https://storage.yandexcloud.net:443/
region: ru-central1
bucketnames: core-loki
secretAccessKey: "${S3_SECRET_ACCESS_KEY}"
accessKeyId: "${S3_ACCESS_KEY_ID}"
tsdb_shipper:
shared_store: s3
active_index_directory: /var/loki/tsdb-index
cache_location: /var/loki/tsdb-cache
schemaConfig:
configs:
- from: "2020-01-01"
store: tsdb
object_store: s3
schema: v12
index:
prefix: tsdb_index_
period: 24h
distributor:
ring:
kvstore:
store: memberlist
ingester:
lifecycler:
ring:
kvstore:
store: memberlist
replication_factor: 1
autoforget_unhealthy: true
chunk_idle_period: 1h
chunk_target_size: 1572864
max_chunk_age: 1h
chunk_encoding: snappy
server:
grpc_server_max_recv_msg_size: 4194304
grpc_server_max_send_msg_size: 4194304
limits_config:
allow_structured_metadata: false
reject_old_samples: true
reject_old_samples_max_age: 168h
max_cache_freshness_per_query: 10m
split_queries_by_interval: 15m
retention_period: 30d
max_global_streams_per_user: 0
ingestion_rate_mb: 30
query_timeout: 300s
volume_enabled: true
memcached:
chunk_cache:
enabled: true
results_cache:
enabled: true
ruler:
enabled: true
replicas: 1
<<: *globalExtra
storage:
type: local
local:
directory: /etc/loki/rules
persistence:
enabled: true
size: 10Gi
ring:
kvstore:
store: memberlist
directories:
fake:
rules.txt: |
groups:
- name: logs2metrics
interval: 1m
rules:
- record: panic_count
expr: |
sum by (app,cluster) (
count_over_time({app=~".+",level!~"INFO|info|debug"}|="panic"[5m])
)
- record: logs_size_by_app
expr: |
sum by (app,cluster)(
bytes_over_time({app=~".+"}[1m])
)
bloomCompactor:
replicas: 0
bloomGateway:
replicas: 0
backend:
replicas: 0
read:
replicas: 0
write:
replicas: 0
singleBinary:
replicas: 0
test:
enabled: false
lokiCanary:
enabled: false