Has anyone managed to get OnCall working locally/on premise?

Hello,

I have been using the open source Grafana for a couple of years now on premise on a Ubuntu VM, not in Docker. I’ve trying to install OnCall using this link (hobby mode):

However I get this error:

image

Grafana and Prometheus are already running locally, but this is how ‘docker ps’ and my ‘docker-compose.yaml’ looks. I’m missing something:

CONTAINER ID   IMAGE            COMMAND                  CREATED       STATUS                 PORTS                                       NAMES
2438bcb7c144   grafana/oncall   "sh -c 'uwsgi --ini …"   6 days ago    Up 6 days              0.0.0.0:8080->8080/tcp, :::8080->8080/tcp   mydocker_engine_1
ed34f543c51c   grafana/oncall   "sh -c ./celery_with…"   6 days ago    Up 21 minutes                                                      mydocker_celery_1
2bd197d1c28c   redis:7.0.5      "docker-entrypoint.s…"   2 weeks ago   Up 2 weeks (healthy)   6379/tcp                                    mydocker_redis_1

docker-compose.yml file


x-environment: &oncall-environment
  DATABASE_TYPE: sqlite3
  BROKER_TYPE: redis
  BASE_URL: $DOMAIN
  SECRET_KEY: $SECRET_KEY
  FEATURE_PROMETHEUS_EXPORTER_ENABLED: ${FEATURE_PROMETHEUS_EXPORTER_ENABLED:-false}
  PROMETHEUS_EXPORTER_SECRET: ${PROMETHEUS_EXPORTER_SECRET:-}
  REDIS_URI: redis://redis:6379/0
  DJANGO_SETTINGS_MODULE: settings.hobby
  CELERY_WORKER_QUEUE: "default,critical,long,slack,telegram,webhook,retry,celery,grafana"
  CELERY_WORKER_CONCURRENCY: "1"
  CELERY_WORKER_MAX_TASKS_PER_CHILD: "100"
  CELERY_WORKER_SHUTDOWN_INTERVAL: "65m"
  CELERY_WORKER_BEAT_ENABLED: "True"
  GRAFANA_API_URL: https://grafana.com:3000

services:
  engine:
    image: grafana/oncall
    restart: always
    ports:
      - "8080:8080"
    command: sh -c "uwsgi --ini uwsgi.ini"
    environment: *oncall-environment
    volumes:
      - oncall_data:/var/lib/oncall
    depends_on:
      oncall_db_migration:
        condition: service_completed_successfully
      redis:
        condition: service_healthy

  celery:
    image: grafana/oncall
    restart: always
    command: sh -c "./celery_with_exporter.sh"
    environment: *oncall-environment
    volumes:
      - oncall_data:/var/lib/oncall
    depends_on:
      oncall_db_migration:
        condition: service_completed_successfully
      redis:
        condition: service_healthy

  oncall_db_migration:
    image: grafana/oncall
    command: python manage.py migrate --noinput
    environment: *oncall-environment
    volumes:
      - oncall_data:/var/lib/oncall
    depends_on:
      redis:
        condition: service_healthy

  redis:
    image: redis:7.0.5
    restart: always
    expose:
      - 6379
    volumes:
      - redis_data:/data
    deploy:
      resources:
        limits:
          memory: 500m
          cpus: "0.5"
    healthcheck:
      test: ["CMD", "redis-cli", "ping"]
      timeout: 5s
      interval: 5s
      retries: 10

volumes:
#  grafana_data:
#  prometheus_data:
  oncall_data:
  redis_data:

I’m wondering if I’m missing the prometheus part, I really don’t know at this point. I’m so close, but now lost to how to fix this.

Here is some other screenshots:

Add the local host address:

Then if I select ‘Open Grafana Oncall’

I quickly see this:

Then I get this

If I choose ‘Open Grafana Oncall’. I get:

image

Sure I use it on-prem. No reason it won’t work. It’s tough to say what’s going on here without logs but it looks like there’s an issue syncing the users from Grafana → Grafana OnCall. Check that the Service Account is set up and API key working. This would be in Grafana>Users and access> Service Accounts. OnCall periodically syncs users from the main Grafana instance into its own DB.

If I setup from docker-compose, everyting works well.
But if Grafana is configurated with tls the Authentication fails at first on the validation of the cert and I think also on the user sync.

@g0nz0uk in the documentation the url ist http://engine:8080 and is your .env file in the same folder as your compose file?

Hello, this is what I see under the Users and Access. I’ve desperate to get this working.

Could you let me know where the log files are or best ones to get for you please and I’ll post back?

I think that is missing?

Here is my compose file

version: "3.9"

x-environment: &oncall-environment
  DATABASE_TYPE: sqlite3
  BROKER_TYPE: redis
  BASE_URL: $DOMAIN
  SECRET_KEY: $SECRET_KEY
  FEATURE_PROMETHEUS_EXPORTER_ENABLED: ${FEATURE_PROMETHEUS_EXPORTER_ENABLED:-false}
  PROMETHEUS_EXPORTER_SECRET: ${PROMETHEUS_EXPORTER_SECRET:-}
  REDIS_URI: redis://redis:6379/0
  DJANGO_SETTINGS_MODULE: settings.hobby
  CELERY_WORKER_QUEUE: "default,critical,long,slack,telegram,webhook,retry,celery,grafana"
  CELERY_WORKER_CONCURRENCY: "1"
  CELERY_WORKER_MAX_TASKS_PER_CHILD: "100"
  CELERY_WORKER_SHUTDOWN_INTERVAL: "65m"
  CELERY_WORKER_BEAT_ENABLED: "True"
  GRAFANA_API_URL: https://localhost:3000

services:
  engine:
    image: grafana/oncall
    restart: always
    ports:
      - "8080:8080"
    command: sh -c "uwsgi --ini uwsgi.ini"
    environment: *oncall-environment
    volumes:
      - oncall_data:/var/lib/oncall
    depends_on:
      oncall_db_migration:
        condition: service_completed_successfully
      redis:
        condition: service_healthy

  celery:
    image: grafana/oncall
    restart: always
    command: sh -c "./celery_with_exporter.sh"
    environment: *oncall-environment
    volumes:
      - oncall_data:/var/lib/oncall
    depends_on:
      oncall_db_migration:
        condition: service_completed_successfully
      redis:
        condition: service_healthy

  oncall_db_migration:
    image: grafana/oncall
    command: python manage.py migrate --noinput
    environment: *oncall-environment
    volumes:
      - oncall_data:/var/lib/oncall
    depends_on:
      redis:
        condition: service_healthy

  redis:
    image: redis:7.0.5
    restart: always
    expose:
      - 6379
    volumes:
      - redis_data:/data
    deploy:
      resources:
        limits:
          memory: 500m
          cpus: "0.5"
    healthcheck:
      test: ["CMD", "redis-cli", "ping"]
      timeout: 5s
      interval: 5s
      retries: 10

volumes:
  grafana_data:
  prometheus_data:
  oncall_data:
  redis_data:

My config.env.save file

echo "DOMAIN=http://grafana.mydomain.com:8080
# Remove 'with_grafana' below if you want to use existing grafana
# Add 'with_prometheus' below to optionally enable a local prometheus for oncall metrics
COMPOSE_PROFILES=with_grafana,with_prometheus
COMPOSE_PROFILES=with_grafana
# to setup an auth token for prometheus exporter metrics:
# PROMETHEUS_EXPORTER_SECRET=my_random_prometheus_secret
# also, make sure to enable the /metrics endpoint:
# FEATURE_PROMETHEUS_EXPORTER_ENABLED=True
SECRET_KEY="LnFUzDkciSHUN4353453c6cAmrGGc" > .env

I also see this briefly:

403 errors in log file

logger=accesscontrol.evaluator t=2024-06-03T20:10:28.56227234+01:00 level=debug msg="Matched scope" userscope=plugins:* targetscope=plugins:id:grafana-oncall-app
logger=context userId=11 orgId=1 uname=admin.testuser1 t=2024-06-03T20:10:28.565884946+01:00 level=info msg="Request Completed" method=GET path=/api/plugin-proxy/grafana-oncall-app/api/internal/v1/organization/ status=403 remote_addr=10.1.35.130 time_ms=44 duration=44.89981ms size=44 referer=https://hob.global.com:3000/a/grafana-oncall-app/alert-groups handler=/api/plugin-proxy/:pluginId/* status_source=server
logger=context userId=11 orgId=1 uname=admin.testuser1 t=2024-06-03T20:10:28.597028081+01:00 level=info msg="Request Completed" method=GET path=/api/plugin-proxy/grafana-oncall-app/api/internal/v1/teams/ status=403 remote_addr=10.1.35.130 time_ms=42 duration=42.479988ms size=44 referer=https://hob.global.com:3000/a/grafana-oncall-app/alert-groups handler=/api/plugin-proxy/:pluginId/* status_source=server
logger=context userId=11 orgId=1 uname=admin.testuser1 t=2024-06-03T20:10:28.6128135+01:00 level=info msg="Request Completed" method=GET path=/api/plugin-proxy/grafana-oncall-app/api/internal/v1/user/ status=403 remote_addr=10.1.35.130 time_ms=53 duration=53.197862ms size=44 referer=https://hob.global.com:3000/a/grafana-oncall-app/alert-groups handler=/api/plugin-proxy/:pluginId/* status_source=server
logger=accesscontrol.evaluator t=2024-06-03T20:10:28.890455303+01:00 level=debug msg="Matched scope" userscope=plugins:* targetscope=plugins:id:grafana-oncall-app
logger=accesscontrol.evaluator t=2024-06-03T20:10:28.922211762+01:00 level=debug msg="Matched scope" userscope=plugins:* targetscope=plugins:id:grafana-oncall-app
logger=context userId=11 orgId=1 uname=admin.testuser1 t=2024-06-03T20:10:28.928911437+01:00 level=info msg="Request Completed" method=GET path=/api/plugin-proxy/grafana-oncall-app/api/internal/v1/user/ status=403 remote_addr=10.1.35.130 time_ms=40 duration=40.891471ms size=44 referer=https://hob.global.com:3000/a/grafana-oncall-app/alert-groups handler=/api/plugin-proxy/:pluginId/* status_source=server
logger=context userId=11 orgId=1 uname=admin.testuser1 t=2024-06-03T20:10:28.955257057+01:00 level=info msg="Request Completed" method=GET path=/api/plugin-proxy/grafana-oncall-app/api/internal/v1/alert_receive_channels/integration_options/ status=403 remote_addr=10.1.35.130 time_ms=35 duration=35.059267ms size=44 referer=https://hob.global.com:3000/a/grafana-oncall-app/alert-groups handler=/api/plugin-proxy/:pluginId/* status_source=server
logger=accesscontrol.evaluator t=2024-06-03T20:10:29.024281067+01:00 level=debug msg="Matched scope" userscope=plugins:* targetscope=plugins:id:grafana-oncall-app
logger=context userId=11 orgId=1 uname=admin.testuser1 t=2024-06-03T20:10:29.05562419+01:00 level=info msg="Request Completed" method=GET path=/api/plugin-proxy/grafana-oncall-app/api/internal/v1/features/ status=401 remote_addr=10.1.35.130 time_ms=33 duration=33.27114ms size=44 referer=https://hob.global.com:3000/a/grafana-oncall-app/alert-groups handler=/api/plugin-proxy/:pluginId/* status_source=server
logger=accesscontrol.evaluator t=2024-06-03T20:10:29.130581624+01:00 level=debug msg="Matched scope" userscope=plugins:* targetscope=plugins:id:grafana-oncall-app
logger=accesscontrol.evaluator t=2024-06-03T20:10:29.154575012+01:00 level=debug msg="Matched scope" userscope=plugins:* targetscope=plugins:id:grafana-oncall-app
logger=context userId=11 orgId=1 uname=admin.testuser1 t=2024-06-03T20:10:29.159604325+01:00 level=info msg="Request Completed" method=GET path=/api/plugin-proxy/grafana-oncall-app/api/internal/v1/organization/ status=403 remote_addr=10.1.35.130 time_ms=31 duration=31.025914ms size=44 referer=https://hob.global.com:3000/a/grafana-oncall-app/alert-groups handler=/api/plugin-proxy/:pluginId/* status_source=server
logger=context userId=11 orgId=1 uname=admin.testuser1 t=2024-06-03T20:10:29.188680669+01:00 level=info msg="Request Completed" method=GET path=/api/plugin-proxy/grafana-oncall-app/api/internal/v1/teams/ status=403 remote_addr=10.1.35.130 time_ms=36 duration=36.160813ms size=44 referer=https://hob.global.com:3000/a/grafana-oncall-app/alert-groups handler=/api/plugin-proxy/:pluginId/* status_source=server
logger=accesscontrol.evaluator t=2024-06-03T20:10:47.88940472+01:00 level=debug msg="Matched scope" userscope=plugins:* targetscope=plugins:id:grafana-oncall-app
logger=accesscontrol.evaluator t=2024-06-03T20:10:59.832446774+01:00 level=debug msg="Matched scope" userscope=plugins:* targetscope=plugins:id:grafana-oncall-app
logger=context userId=0 orgId=2 uname= t=2024-06-03T20:11:05.902047025+01:00 level=debug msg="Received unknown frontend metric" metric=frontend_plugin_preload_grafana-oncall-app_ms```

problem got resolved by adding Grafana userID and password as below in .env file and docker-compose.yaml

[root@ip dock]# more .env
DOMAIN=http://localhost:8080

Remove ‘with_grafana’ below if you want to use existing grafana

Add ‘with_prometheus’ below to optionally enable a local prometheus for oncall metrics

e.g. COMPOSE_PROFILES=with_grafana,with_prometheus

#COMPOSE_PROFILES=
#COMPOSE_PROFILES=with_grafana

to setup an auth token for prometheus exporter metrics:

PROMETHEUS_EXPORTER_SECRET=my_random_prometheus_secret

also, make sure to enable the /metrics endpoint:

FEATURE_PROMETHEUS_EXPORTER_ENABLED=True

SECRET_KEY=glsa_P2RBIFPgcwjikBOabpZHyWp79dm74nkx_54715996
GRAFANA_USER=admin
GRAFANA_PASSWORD=admin@123
#GF_SECURITY_ADMIN_USER: ${GRAFANA_USER:-admin}
#GF_SECURITY_ADMIN_PASSWORD: ${GRAFANA_PASSWORD:-admin@123}
[root@ip dock]#

Docker-compose.yaml as below

x-environment: &oncall-environment
DATABASE_TYPE: sqlite3
BROKER_TYPE: redis
BASE_URL: $DOMAIN
SECRET_KEY: $SECRET_KEY

SECRET_KEY: glsa_P2RBIFPgcwjikBOabpZHyWp79dm74nkx_54715996

FEATURE_PROMETHEUS_EXPORTER_ENABLED: ${FEATURE_PROMETHEUS_EXPORTER_ENABLED:-false}
PROMETHEUS_EXPORTER_SECRET: ${PROMETHEUS_EXPORTER_SECRET:-}
REDIS_URI: redis://redis:6379/0
DJANGO_SETTINGS_MODULE: settings.hobby
CELERY_WORKER_QUEUE: “default,critical,long,slack,telegram,webhook,retry,celery,grafana”
CELERY_WORKER_CONCURRENCY: “1”
CELERY_WORKER_MAX_TASKS_PER_CHILD: “100”
CELERY_WORKER_SHUTDOWN_INTERVAL: “65m”
CELERY_WORKER_BEAT_ENABLED: “True”
GRAFANA_API_URL: http://10.131.4.136:3000
GRAFANA_USER: ${GRAFANA_USER:-}
GRAFANA_PASSWORD: ${GRAFANA_PASSWORD:-}
GF_SECURITY_ADMIN_USER: ${GRAFANA_USER:-}
GF_SECURITY_ADMIN_PASSWORD: ${GRAFANA_PASSWORD:-}
services:
engine:
image: grafana/oncall
restart: always
ports:
- “8080:8080”
command: sh -c “uwsgi --ini uwsgi.ini”
environment: *oncall-environment
volumes:
- oncall_data:/var/lib/oncall
depends_on:
oncall_db_migration:
condition: service_completed_successfully
redis:
condition: service_healthy
condition: service_healthy

oncall_db_migration:
image: grafana/oncall
command: python manage.py migrate --noinput
environment: *oncall-environment
volumes:
- oncall_data:/var/lib/oncall
depends_on:
redis:
condition: service_healthy

redis:
image: redis:7.0.5
restart: always
expose:
- 6379
volumes:
- redis_data:/data
deploy:
resources:
limits:
memory: 500m
cpus: “0.5”
healthcheck:
test: [“CMD”, “redis-cli”, “ping”]
timeout: 5s
interval: 5s
retries: 10

volumes:
grafana_data:
prometheus_data:
oncall_data:
redis_data:
[root@ip dock]#