Grafana/Loki slow to load more than 2k logs

Hi, I am running Loki using Simple Scalable mode using docker compose. I have 3 read and 3 writes running and using nginx inside docker compose to distribute the traffic.

My question is when we set the limit value to 1k logs the queries are fast, but as soon as we increase the value to more than 1k the queries take a very long time and sometimes times out.

I have tried modifying the below configs but none of them seemed to improve the performance:

  • chunk_encoding: snappy
  • max_concurrent: 8
  • tsdb_max_query_parallelism: 1000
  • split_queries_by_interval: 1hr

Is there anything we can do to improve the read performance of Grafana/Loki logs? Any help will be appreciated. Thank you.

Grafana OSS Version:
{
“commit”: “252761264e22ece57204b327f9130d3b44592c01”,
“database”: “ok”,
“version”: “10.3.3”
}

Below are the loki configs

---
auth_enabled: false

server:
  http_listen_address: 0.0.0.0
  http_listen_port: 3100

memberlist:
  join_members: ["read", "write", "backend"]
  dead_node_reclaim_time: 30s
  gossip_to_dead_nodes_time: 15s
  left_ingesters_timeout: 30s
  bind_addr: ['0.0.0.0']
  bind_port: 7946
  gossip_interval: 2s

schema_config:
  configs:
    - from: 2021-08-01
      store: tsdb
      object_store: s3
      schema: v13
      index:
        prefix: index_
        period: 24h

limits_config:
  retention_period: 360h
  reject_old_samples: false
  reject_old_samples_max_age: 168h
  split_queries_by_interval: 15m

common:
  path_prefix: /loki
  replication_factor: 1
  compactor_address: http://backend:3100
  storage:
    s3:
      endpoint: minio:9000
      insecure: true
      bucketnames: loki-data
      access_key_id: loki
      secret_access_key: supersecret

Here is the docker compose file

---
version: "3"

networks:
  loki:

services:
  read:
    image: grafana/loki:3.0.0
    command: "-config.file=/etc/loki/config.yaml -target=read"
    ports:
      - 3100
      - 7946
      - 9095
    volumes:
      - ./loki-config.yaml:/etc/loki/config.yaml
    depends_on:
      - minio
      interval: 10s
      timeout: 5s
      retries: 5
    networks: &loki-dns
      loki:
        aliases:
          - loki

  write:
    image: grafana/loki:3.0.0
    command: "-config.file=/etc/loki/config.yaml -target=write"
    ports:
      - 3100
      - 7946
      - 9095
    volumes:
      - ./loki-config.yaml:/etc/loki/config.yaml
      interval: 10s
      timeout: 5s
      retries: 5
    depends_on:
      - minio
    networks:
      <<: *loki-dns

  minio:
    image: minio/minio
    entrypoint:
      - sh
      - -euc
      - |
        mkdir -p /data/loki-data && \
        mkdir -p /data/loki-ruler && \
        minio server /data
    environment:
      - MINIO_ROOT_USER=loki
      - MINIO_ROOT_PASSWORD=supersecret
      - MINIO_PROMETHEUS_AUTH_TYPE=public
      - MINIO_UPDATE=off
    ports:
      - 9000
    volumes:
      - 'minio_data:/data'
    healthcheck:
      test: [ "CMD", "curl", "-f", "http://localhost:9000/minio/health/live" ]
      interval: 15s
      timeout: 20s
      retries: 5
    networks:
      - loki

  backend:
    image: grafana/loki:3.0.0
    volumes:
      - ./loki-config.yaml:/etc/loki/config.yaml
    ports:
      - "3100"
      - "7946"
    command: "-config.file=/etc/loki/config.yaml -target=backend -legacy-read-mode=false"
    depends_on:
      - gateway
    networks:
      - loki
    

  gateway:
    image: nginx:latest
    depends_on:
      - read
      - write
    entrypoint:
      - sh
      - -euc
      - |
        cat <<EOF > /etc/nginx/nginx.conf
        user  nginx;
        worker_processes  5;  ## Default: 1

        events {
          worker_connections   1000;
        }

        http {
          resolver 127.0.0.11;

          server {
            listen             3100;

            location = / {
              return 200 'OK';
              auth_basic off;
            }

            location = /api/prom/push {
              proxy_pass       http://write:3100\$$request_uri;
            }

            location = /api/prom/tail {
              proxy_pass       http://read:3100\$$request_uri;
              proxy_set_header Upgrade \$$http_upgrade;
              proxy_set_header Connection "upgrade";
            }

            location ~ /api/prom/.* {
              proxy_pass       http://read:3100\$$request_uri;
            }

            location = /loki/api/v1/push {
              proxy_pass       http://write:3100\$$request_uri;
            }

            location = /loki/api/v1/tail {
              proxy_pass       http://read:3100\$$request_uri;
              proxy_set_header Upgrade \$$http_upgrade;
              proxy_set_header Connection "upgrade";
            }

            location ~ /loki/api/.* {
              proxy_pass       http://read:3100\$$request_uri;
            }
          }
        }
        EOF
        /docker-entrypoint.sh nginx -g "daemon off;"
    ports:
      - "3100:3100"
    healthcheck:
      test: ["CMD", "service", "nginx", "status"]
      interval: 10s
      timeout: 5s
      retries: 5
    networks:
      - loki

volumes:
  minio_data:
    driver: local
  1. Check your querier and see if there is any error log there.

  2. Try increasing some of your grpc message size, such as grpc_server_max_recv_msg_size and grpc_server_max_send_msg_size.