Empty ring when attemting to deploy standalone loki rule component

Hello,
I have loki on global cluster, it works fine.
We have a need to evaluate rules on different dev/prod clusters (the rules are auto generated on these clusters depending on what is deployed there).
I expected I could deploy loki rule on these clusters and set global loki as target to query it.
I am using loki-distributed helm chart.
Config looks like:

          fullnameOverride: "loki"
          serviceMonitor:
            enabled: true
            labels:
              prometheus: main

          prometheusRule:
            enabled: false

          compactor:
            enabled: false

          ingester:
            replicas: 0

          distributor:
            replicas: 0

          gateway:
            replicas: 0

          querier:
            replicas: 0

          queryFrontend:
            replicas: 1
            resources:
              requests:
                cpu: 50m
                memory: 128Mi
              limits:
                cpu: "1"
                memory: 1G
            extraEnv:
              - name: GOOGLE_APPLICATION_CREDENTIALS
                value: /etc/secrets/key.json
            extraVolumeMounts:
              - name: loki-gcs-secret
                mountPath: /etc/secrets
            extraVolumes:
              - name: loki-gcs-secret
                secret:
                  secretName: loki-mr-secret
            extraArgs:
              - --log.format=json
              - --log.level=info

          tableManager:
            enabled: false

          ruler:
            replicas: 1
            enabled: true
            resources:
              requests:
                cpu: "1.5"
                memory: 3Gi
              limits:
                cpu: "7"
                memory: 6Gi
            persistence:
              enabled: true
              size: 10Gi
            extraEnv:
              - name: GOOGLE_APPLICATION_CREDENTIALS
                value: /etc/secrets/key.json
            extraVolumes:
              - name: loki-operator-rules
                configMap:
                  name: loki-operator-rules
                  defaultMode: 420
              - name: rules-tmp
                emptyDir: {}
              - name: loki-mr-secret
                secret:
                  secretName: loki-mr-secret
            extraVolumeMounts:
              - name: loki-operator-rules
                mountPath: /var/loki/rules/cit1-k8s
              - name: rules-tmp
                mountPath: /tmp/loki/rules
              - name: loki-mr-secret
                mountPath: /etc/secrets
            extraArgs:
              - --log.format=json
              - --log.level=info

          indexGateway:
            enabled: false

          memcachedExporter:
            enabled: false

          memcachedChunks:
            enabled: false

          memcachedFrontend:
            enabled: false

          memcachedIndexQueries:
            enabled: false

          memcachedIndexWrites:
            enabled: false

          networkPolicy:
            enabled: false

          loki:
            schemaConfig:
              configs:
                - from: 2021-06-05
                  store: boltdb-shipper
                  object_store: gcs
                  schema: v11
                  index:
                    prefix: loki_index_
                    period: 24h

            storageConfig:
              gcs:
                bucket_name: something-loki-multiregional

              boltdb_shipper:
                active_index_directory: /var/loki/index
                shared_store: gcs
                cache_location: /var/loki/cache

            structuredConfig:
              auth_enabled: true

              common:
                replication_factor: 1

              frontend:
                max_outstanding_per_tenant: 4096
                downstream_url: https://loki-gateway.int.something.live

              ruler:
                storage:
                  local:
                    directory: /var/loki/rules
                rule_path: /tmp/loki/rules
                enable_api: true
                alertmanager_url: http://kube-prometheus-stack-alertmanager.monitoring.svc.cluster.local:9093
                enable_alertmanager_v2: true
                wal:
                  dir: /var/loki/wal
                remote_write:
                  enabled: false
                evaluation:
                  mode: remote
                  query_frontend: 
                    address: loki-query-frontend-grpc.global.int.something.live:443

Logs are:

2024-03-20 20:10:36.517	
{
  "caller": "compat.go:78",
  "err": "empty ring",
  "level": "error",
  "msg": "rule evaluation failed",
  "query": "(sum by (cluster,namespace,message,level,thread,logger,exception,app)(count_over_time({level=\"ERROR\", marker=\"AppLog\", namespace=\"{{ $namespace }}\", app=\"{{ $app }}\"} | json | label_format exception=\"{{ trunc 100 .exception }}\" | label_format message=\"{{ trunc 100 .exception }}\"[1m])) > 0)",
  "query_hash": 483524370,
  "rule_name": "Service Error Detected",
  "rule_type": "alerting",
  "ts": "2024-03-20T18:10:36.498081589Z",
  "user": "cit1-k8s"
}
2024-03-20 20:10:36.517	
{
  "cache_chunk_bytes_fetched": 0,
  "cache_chunk_bytes_stored": 0,
  "cache_chunk_download_time": "0s",
  "cache_chunk_hit": 0,
  "cache_chunk_req": 0,
  "cache_index_download_time": "0s",
  "cache_index_hit": 0,
  "cache_index_req": 0,
  "cache_result_download_time": "0s",
  "cache_result_hit": 0,
  "cache_result_req": 0,
  "cache_stats_results_download_time": "0s",
  "cache_stats_results_hit": 0,
  "cache_stats_results_req": 0,
  "caller": "metrics.go:159",
  "component": "ruler",
  "duration": "478.345µs",
  "end_delta": "1.623118ms",
  "evaluation_mode": "local",
  "latency": "fast",
  "length": "0s",
  "level": "info",
  "limit": 0,
  "lines_per_second": 0,
  "org_id": "cit1-k8s",
  "post_filter_lines": 0,
  "query": "(sum by (cluster,namespace,message,level,thread,logger,exception,app)(count_over_time({level=\"ERROR\", marker=\"AppLog\", namespace=\"{{ $namespace }}\", app=\"{{ $app }}\"} | json | label_format exception=\"{{ trunc 100 .exception }}\" | label_format message=\"{{ trunc 100 .exception }}\"[1m])) > 0)",
  "query_hash": 483524370,
  "query_type": "metric",
  "queue_time": "0s",
  "range_type": "instant",
  "returned_lines": 0,
  "shards": 0,
  "splits": 0,
  "start_delta": "1.622884ms",
  "status": "500",
  "step": "0s",
  "store_chunks_download_time": "0s",
  "throughput": "0B",
  "total_bytes": "0B",
  "total_bytes_structured_metadata": "0B",
  "total_entries": 0,
  "total_lines": 0,
  "traceID": "434eb7749cb8c56e",
  "ts": "2024-03-20T18:10:36.497961389Z"
}
2024-03-20 20:10:36.517	
{
  "caller": "engine.go:232",
  "component": "ruler",
  "evaluation_mode": "local",
  "level": "info",
  "msg": "executing query",
  "org_id": "cit1-k8s",
  "query": "(sum by (cluster,namespace,message,level,thread,logger,exception,app)(count_over_time({level=\"ERROR\", marker=\"AppLog\", namespace=\"{{ $namespace }}\", app=\"{{ $app }}\"} | json | label_format exception=\"{{ trunc 100 .exception }}\" | label_format message=\"{{ trunc 100 .exception }}\"[1m])) > 0)",
  "query_hash": 483524370,
  "traceID": "434eb7749cb8c56e",
  "ts": "2024-03-20T18:10:36.497309039Z",
  "type": "instant"
}
2024-03-20 20:10:36.517	
{
  "caller": "compat.go:66",
  "level": "info",
  "msg": "evaluating rule",
  "query": "(sum by (cluster,namespace,message,level,thread,logger,exception,app)(count_over_time({level=\"ERROR\", marker=\"AppLog\", namespace=\"{{ $namespace }}\", app=\"{{ $app }}\"} | json | label_format exception=\"{{ trunc 100 .exception }}\" | label_format message=\"{{ trunc 100 .exception }}\"[1m])) > 0)",
  "query_hash": 483524370,
  "rule_name": "Service Error Detected",
  "rule_type": "alerting",
  "ts": "2024-03-20T18:10:36.49703202Z",
  "user": "cit1-k8s"
}
2024-03-20 20:10:35.716	
{
  "caller": "manager.go:995",
  "level": "info",
  "msg": "Starting rule manager...",
  "ts": "2024-03-20T18:10:35.686295555Z",
  "user": "cit1-k8s"
}
2024-03-20 20:10:35.716	
{
  "caller": "memberlist_client.go:595",
  "err": "1 error occurred:\n\t* Failed to resolve loki-memberlist: lookup loki-memberlist on 10.2.72.10:53: no such host\n\n",
  "level": "warn",
  "msg": "joining memberlist cluster: failed to reach any nodes",
  "retries": 0,
  "ts": "2024-03-20T18:10:35.650144918Z"
}
2024-03-20 20:10:35.716	
{
  "caller": "memberlist_logger.go:74",
  "level": "warn",
  "msg": "Failed to resolve loki-memberlist: lookup loki-memberlist on 10.2.72.10:53: no such host",
  "ts": "2024-03-20T18:10:35.650080664Z"
}
2024-03-20 20:10:35.716	
{
  "caller": "loki.go:505",
  "level": "info",
  "msg": "Loki started",
  "ts": "2024-03-20T18:10:35.647333951Z"
}
2024-03-20 20:10:35.716	
{
  "caller": "ruler.go:528",
  "level": "info",
  "msg": "ruler up and running",
  "ts": "2024-03-20T18:10:35.647260413Z"
}
2024-03-20 20:10:35.716	
{
  "caller": "module_service.go:82",
  "level": "info",
  "module": "ruler",
  "msg": "initialising",
  "ts": "2024-03-20T18:10:35.647129633Z"
}
2024-03-20 20:10:35.716	
{
  "caller": "module_service.go:82",
  "level": "info",
  "module": "store",
  "msg": "initialising",
  "ts": "2024-03-20T18:10:35.646987931Z"
}
2024-03-20 20:10:35.716	
{
  "caller": "module_service.go:82",
  "level": "info",
  "module": "ingester-querier",
  "msg": "initialising",
  "ts": "2024-03-20T18:10:35.646917882Z"
}
2024-03-20 20:10:35.716	
{
  "caller": "ring.go:273",
  "level": "info",
  "msg": "ring doesn't exist in KV store yet",
  "ts": "2024-03-20T18:10:35.646746334Z"
}
2024-03-20 20:10:35.716	
{
  "caller": "memberlist_client.go:573",
  "join_members": "loki-memberlist",
  "level": "info",
  "msg": "joining memberlist cluster",
  "ts": "2024-03-20T18:10:35.646674014Z"
}
2024-03-20 20:10:35.716	
{
  "caller": "memberlist_client.go:560",
  "elapsed_time": "19.942534ms",
  "joined_nodes": 0,
  "level": "warn",
  "msg": "memberlist fast-join finished",
  "ts": "2024-03-20T18:10:35.646641319Z"
}
2024-03-20 20:10:35.716	
{
  "caller": "memberlist_logger.go:74",
  "level": "warn",
  "msg": "Failed to resolve loki-memberlist: lookup loki-memberlist on 10.2.72.10:53: no such host",
  "ts": "2024-03-20T18:10:35.646580392Z"
}
2024-03-20 20:10:35.716	
{
  "caller": "module_service.go:82",
  "level": "info",
  "module": "ring",
  "msg": "initialising",
  "ts": "2024-03-20T18:10:35.635205214Z"
}
2024-03-20 20:10:35.716	
{
  "caller": "module_service.go:82",
  "level": "info",
  "module": "memberlist-kv",
  "msg": "initialising",
  "ts": "2024-03-20T18:10:35.635054292Z"
}
2024-03-20 20:10:35.716	
{
  "caller": "module_service.go:82",
  "level": "info",
  "module": "analytics",
  "msg": "initialising",
  "ts": "2024-03-20T18:10:35.634904523Z"
}
2024-03-20 20:10:35.716	
{
  "caller": "module_service.go:82",
  "level": "info",
  "module": "server",
  "msg": "initialising",
  "ts": "2024-03-20T18:10:35.63481962Z"
}
2024-03-20 20:10:35.716	
{
  "caller": "module_service.go:82",
  "level": "info",
  "module": "runtime-config",
  "msg": "initialising",
  "ts": "2024-03-20T18:10:35.63478843Z"
}
2024-03-20 20:10:35.716	
{
  "caller": "mapper.go:47",
  "level": "info",
  "msg": "cleaning up mapped rules directory",
  "path": "/tmp/loki/rules",
  "ts": "2024-03-20T18:10:35.629398402Z"
}
2024-03-20 20:10:35.716	
{
  "caller": "shipper_index_client.go:76",
  "index-store": "boltdb-shipper-2021-06-05",
  "level": "info",
  "msg": "starting boltdb shipper in RO mode",
  "ts": "2024-03-20T18:10:35.628835924Z"
}
2024-03-20 20:10:35.716	
{
  "caller": "shipper.go:165",
  "index-store": "boltdb-shipper-2021-06-05",
  "level": "info",
  "msg": "starting index shipper in RO mode",
  "ts": "2024-03-20T18:10:35.628787761Z"
}
2024-03-20 20:10:35.716	
{
  "caller": "table_manager.go:271",
  "distinct_users": "",
  "distinct_users_len": 0,
  "duration": "4.298µs",
  "index-store": "boltdb-shipper-2021-06-05",
  "level": "info",
  "msg": "query readiness setup completed",
  "ts": "2024-03-20T18:10:35.628700534Z"
}
2024-03-20 20:10:35.716	
{
  "caller": "memberlist_client.go:540",
  "level": "info",
  "msg": "memberlist fast-join starting",
  "nodes_found": 1,
  "to_join": 4,
  "ts": "2024-03-20T18:10:35.626702642Z"
}
2024-03-20 20:10:35.716	
{
  "caller": "experimental.go:20",
  "feature": "In-memory (FIFO) cache - chunksembedded-cache",
  "level": "warn",
  "msg": "experimental feature in use",
  "ts": "2024-03-20T18:10:35.626084665Z"
}
2024-03-20 20:10:35.716	
{
  "caller": "cache.go:127",
  "level": "warn",
  "msg": "fifocache config is deprecated. use embedded-cache instead",
  "ts": "2024-03-20T18:10:35.626023544Z"
}
2024-03-20 20:10:35.716	
{
  "caller": "memberlist_client.go:434",
  "cluster_label": "",
  "level": "info",
  "msg": "Using memberlist cluster label and node name",
  "node": "loki-ruler-5dbf96698-b7w9c-1ae6ca0e",
  "ts": "2024-03-20T18:10:35.625559326Z"
}
2024-03-20 20:10:35.716	
{
  "caller": "server.go:322",
  "grpc": "[::]:9095",
  "http": "[::]:3100",
  "level": "info",
  "msg": "server listening on addresses",
  "ts": "2024-03-20T18:10:35.624227Z"
}
2024-03-20 20:10:35.716	
{
  "caller": "main.go:108",
  "level": "info",
  "msg": "Starting Loki",
  "ts": "2024-03-20T18:10:35.620615784Z",
  "version": "(version=2.9.2, branch=HEAD, revision=a17308db6)"
}
2024-03-20 20:10:35.716	
{
  "caller": "loki.go:288",
  "level": "warn",
  "msg": "global timeout not configured, using default engine timeout (\"5m0s\"). This behavior will change in the next major to always use the default global timeout (\"5m\").",
  "ts": "2024-03-20T18:10:35.617459845Z"
}

I have deployed query-frontend componetnt just for tests. The idea is not to use it.
Could you point me why I am getting ring error (I am not expecting any ring with just one component) and what I am doing wrong in general?
(using latest loki)

When I am using query-frontend within same deployment:

address: loki-query-frontend.logging.svc:9095

all looks fine

It’s not clear what you are trying to do, but couple of things:

  1. Ruler is part of Loki cluster, you cannot use Ruler to query another cluster. Actually you may be able to if you don’t have auth enabled, but why would you want to do that?

  2. If you have more than one backend you need to configure a ring for ruler.

  3. It is recommended to use query frontend for ruler, if you don’t then queries are executed from ruler directly and there is no query splitting or any sort of distribution.

hello tonyswumac,
i want to get my older logs on my grafana loki a year old or more, i’m unable to get those old logs and im only getting the current day logs ,
please help me to resolve this problem and even if logs are more than 200mb those are also not showing me on my loki.
@tonyswumac

@tonyswumac I will try to elaborate on what I am trying to do.
We have multiple clusters with it’s own production/non production services. On each cluster we have promtail which is sending logs to the global cluster where loki-distributed is deployed. On global cluster loki-opeator (from opsygy) is deployed. I see there is loki-operator from grafana, but I did not find sufficient documentation how to deploy it with helm.
With this setup, the only possible way to deploy loki alert-rules is to create GlobalLokiRule on global cluster. Which means, if we are deploying service on cit (as example) cluster, which has built-in helm config with alert rules, we have somehow do deliver these rules to global cluster to make them be evaluated with loki rule.
My idea is to deploy some loki components on cit cluster, which will be able to query main loki on global cluster therefore, evaluate alert rules locally without need to somehow transfer alert rules configmaps to global cluster.

I see. So you have multiple application clusters, all sending logs to a centralized Loki cluster, and you’d like rules to be part of application clusters rather than part of Loki cluster deployment, does that sound correct?

I’ve never had to do this, so I could be wrong, but I am quite sure you can’t make ruler work outside of Loki cluster. Looking at Ruler configuration all connectivity are facilitated via gRPC, which should be internal to the Loki cluster. If someone has managed to use ruler outside of Loki cluster I’d like to know how it’s done.

That said, I think you can still use Ruler externally in a different way.

Ruler has its own API. To enable API you’ll just need to enable S3 backend for Ruler. And then you can use the API (or cortextool) to create rules externally. See Alerting and recording rules | Grafana Loki documentation. With that, you’ll just need to make sure that you dedicate a namespace for each of your application cluster so they don’t run into each other. For example you might do:

Namespace | Rule groups
cis1 | app1
cis1 | app2
cis2 | app3
cis2 | app4

And then you’ll just need to figure out a way to bake the API call into your deployment pipeline.

Please create a separate thread with more information.

1 Like

Thank you, I will look into cortex/api.

Currenlty, I am testing the setup with:

  • forntend-query
  • ruler
    On my downstream cluster.

I am able to query loki cluster with downstream query-frontend by setting downstream_url in query-frontend configuration. The ruler is set to use remote evaluation. So it looks like:
Downstream Ruler → Downstream Query-Frontend → Loki Cluster Gateway

Not sure about the performance yet. But looks like it works.

Just want to point out, if your ruler in downstream cluster is able to hit query frontend on gRPC port, then that ruler is effectively part of the Loki cluster whether or not it’s deployed as such. There are definitely considerations to be made in terms of network connectivity and such, but if it’s not a concern then it’s certainly a solution.

hello @tonyswumac
i have installed the grafana loki on my server and on the other servers using the promtail to retrieve the logs on the grafana dashboard.
problem im getting is that im only able to get 24 hour logs on dashboard,
and only limited mb of data logs can be seen on it .

auth_enabled: false

server:
http_listen_port: 3100
grpc_listen_port: 9096
grpc_server_max_recv_msg_size: 4000000000
grpc_server_max_send_msg_size: 4000000000

common:
path_prefix: /etc/loki
storage:
filesystem:
chunks_directory: /etc/loki/chunks
rules_directory: /etc/loki/rules
replication_factor: 1
ring:
instance_addr: 127.0.0.1
kvstore:
store: inmemory

query_range:

results_cache:

cache:

embedded_cache:

enabled: true

max_size_mb: 40000

split_queries_by_interval: 0

parallelise_shardable_queries: false

querier:
max_concurrent: 2048

frontend:
max_outstanding_per_tenant: 4096
compress_responses: true

max_outstanding_per_tenant: 8192 # Adjust the maximum number of outstanding requests per tenant.

max_entry_bytes: 4194304 # Adjust the maximum size of log entries in bytes (4MB in this example).

max_stream_bytes: 104857600

schema_config:
configs:
- from: 2020-10-24
store: boltdb-shipper
object_store: filesystem
schema: v11
index:
prefix: index_
period: 24h

ruler:
alertmanager_url: http://:9093

above is my loki-local-config
kindly please help me how can i get the 1 year old logs and get the 200mb above data on my dashboard. Also please help me to filter out the logs more.

@tonyswumac please look into it and help me to solve this issue

Can you start a new thread, please? Let’s not hijack someone else’s thread.