We wanted to migrate the Loki Distributed stack from one cluster to another cluster on 2023-08-19. The same GCS bucket should be used.
To do this, we first switched off the original Loki, which has the following config:
auth_enabled: false
chunk_store_config:
max_look_back_period: 0s
compactor:
compaction_interval: 2h
retention_delete_delay: 2h
retention_enabled: true
shared_store: gcs
distributor:
ring:
kvstore:
store: memberlist
frontend:
compress_responses: true
log_queries_longer_than: 5s
max_outstanding_per_tenant: 4096
tail_proxy_url: http://loki-distributed-querier:3100
frontend_worker:
frontend_address: loki-distributed-query-frontend:9095
parallelism: 6
ingester:
chunk_block_size: 262144
chunk_encoding: snappy
chunk_idle_period: 5m
chunk_retain_period: 0s
chunk_target_size: 1572864
concurrent_flushes: 32
flush_check_period: 15s
lifecycler:
ring:
heartbeat_timeout: 10m
kvstore:
store: memberlist
replication_factor: 3
max_chunk_age: 2h
max_transfer_retries: 0
wal:
dir: /var/loki/wal
enabled: true
limits_config:
creation_grace_period: 5m
enforce_metric_name: true
ingestion_burst_size_mb: 64
ingestion_rate_mb: 32
ingestion_rate_strategy: global
max_cache_freshness_per_query: 1m
max_entries_limit_per_query: 5000
max_global_streams_per_user: 5000
max_query_parallelism: 32
max_streams_per_user: 0
per_stream_rate_limit: 8MB
per_stream_rate_limit_burst: 12MB
reject_old_samples: true
reject_old_samples_max_age: 365d
retention_period: 168h
retention_stream: <>
split_queries_by_interval: 15m
unordered_writes: true
memberlist:
join_members:
- loki-distributed-memberlist
query_range:
align_queries_with_step: true
cache_results: true
max_retries: 5
parallelise_shardable_queries: true
results_cache:
cache:
enable_fifocache: true
fifocache:
max_size_items: 1024
ttl: 24h
server:
grpc_server_max_concurrent_streams: 100
grpc_server_max_recv_msg_size: 16777216
grpc_server_max_send_msg_size: 16777216
http_listen_port: 3100
register_instrumentation: true
storage_config:
boltdb_shipper:
active_index_directory: /var/loki/index
cache_location: /var/loki/cache
cache_ttl: 72h
query_ready_num_days: 7
resync_interval: 2h
shared_store: gcs
disable_broad_index_queries: false
filesystem:
directory: /var/loki/chunks
gcs:
bucket_name: r7-loki
chunk_buffer_size: 0
request_timeout: 0s
table_manager:
retention_deletes_enabled: false
retention_period: 0s
We then booted the new Loki in the new cluster with the following config:
auth_enabled: false
chunk_store_config:
chunk_cache_config:
embedded_cache:
enabled: true
max_size_mb: 2048
ttl: 4h
memcached:
batch_size: 100
expiration: 72h
parallelism: 100
memcached_client:
addresses: dnssrv+_memcached-client._tcp.loki-distributed-memcached-chunks.loki-distributed.svc.cluster.local
consistent_hash: true
max_item_size: 0
timeout: 200ms
max_look_back_period: 0s
common:
compactor_address: http://loki-distributed-compactor:3100
compactor:
compaction_interval: 2h
retention_delete_delay: 2h
retention_enabled: true
shared_store: gcs
distributor:
ring:
kvstore:
store: memberlist
frontend:
compress_responses: true
log_queries_longer_than: 5s
max_outstanding_per_tenant: 10000
scheduler_address: loki-distributed-query-scheduler:9095
tail_proxy_url: http://loki-distributed-querier:3100
frontend_worker:
grpc_client_config:
max_send_msg_size: 104857600
match_max_concurrent: true
parallelism: 10
scheduler_address: loki-distributed-query-scheduler:9095
ingester:
chunk_block_size: 262144
chunk_encoding: snappy
chunk_idle_period: 12h
chunk_retain_period: 0s
chunk_target_size: 1572864
concurrent_flushes: 32
flush_check_period: 15s
lifecycler:
heartbeat_period: 5s
ring:
heartbeat_timeout: 10m
kvstore:
store: memberlist
replication_factor: 3
max_chunk_age: 12h
max_transfer_retries: 0
wal:
dir: /var/loki/wal
enabled: true
flush_on_shutdown: true
replay_memory_ceiling: 8GB
limits_config:
creation_grace_period: 5m
enforce_metric_name: true
ingestion_burst_size_mb: 64
ingestion_rate_mb: 32
ingestion_rate_strategy: local
max_cache_freshness_per_query: 1m
max_chunks_per_query: 4000000
max_entries_limit_per_query: 5000
max_global_streams_per_user: 0
max_query_length: 0
max_query_lookback: 600d
max_query_parallelism: 64
max_query_series: 5000
max_streams_per_user: 0
per_stream_rate_limit: 8MB
per_stream_rate_limit_burst: 12MB
reject_old_samples: true
reject_old_samples_max_age: 365d
retention_period: 168h
retention_stream: <>
split_queries_by_interval: 15m
tsdb_max_query_parallelism: 512
unordered_writes: true
memberlist:
join_members:
- loki-distributed-memberlist
query_range:
align_queries_with_step: true
cache_results: true
max_retries: 5
parallelise_shardable_queries: true
results_cache:
cache:
memcached_client:
addresses: dnssrv+_memcached-client._tcp.loki-distributed-memcached-frontend.loki-distributed.svc.cluster.local
consistent_hash: true
max_idle_conns: 16
timeout: 500ms
update_interval: 1m
runtime_config:
file: /var/loki-distributed-runtime/runtime.yaml
schema_config:
configs:
- from: "2020-09-07"
index:
period: 24h
prefix: loki_index_
object_store: gcs
schema: v11
store: boltdb-shipper
- chunks:
period: 24h
from: "2023-08-19"
index:
period: 24h
prefix: loki_index_
object_store: gcs
schema: v12
store: boltdb-shipper
server:
grpc_server_max_concurrent_streams: 100
grpc_server_max_recv_msg_size: 16777216
grpc_server_max_send_msg_size: 16777216
http_listen_port: 3100
http_server_read_timeout: 300s
http_server_write_timeout: 300s
register_instrumentation: true
storage_config:
boltdb_shipper:
active_index_directory: /var/loki/index
build_per_tenant_index: true
cache_location: /var/loki/cache
cache_ttl: 168h
index_gateway_client:
server_address: dns:///loki-distributed-index-gateway:9095
query_ready_num_days: 7
resync_interval: 2h
shared_store: gcs
disable_broad_index_queries: false
filesystem:
directory: /var/loki/chunks
gcs:
bucket_name: r7-loki
chunk_buffer_size: 0
request_timeout: 0s
tsdb_shipper:
active_index_directory: /var/loki/tsdb-index
cache_location: /var/loki/tsdb-cache
index_gateway_client:
server_address: dns:///loki-distributed-index-gateway:9095
shared_store: gcs
table_manager:
retention_deletes_enabled: false
retention_period: 0s
However, now the new Loki cannot find the store data and corresponding queries against chunks created by the old Loki generate the log:
Object does not exist.
We assumed that with almost the same “main config” and the new scheme, the logs could be successfully queried before migration.
What is the reason for this? Did we forget a migration step?