Hello,
I am looking to run the docker suite of GitHub - scylladb/scylla-monitoring: Simple monitoring of Scylla with Grafana, and I had to make several changes from the templates in the documentation, but I got at a point where I’m just stuck and I’ve no idea how to proceed (after googling, etc).
Does anyone know what plugin I’m missing here?
Docker logs for Grafana are not showing any errors (I’ve already fixed any errors/warnings it was complaining about).
My docker-compose.yml - just the Grafana bit:
grafana:
container_name: scylla_grafana
image: grafana/grafana:latest
restart: always
ports:
- "127.0.0.1:3000:3000"
environment:
- GF_DASHBOARDS_DEFAULT_HOME_DASHBOARD_PATH=/var/lib/grafana/dashboards/ver_4.6/scylla-overview.4.6.json
- GF_PANELS_DISABLE_SANITIZE_HTML=true
- GF_PATHS_PROVISIONING=/var/lib/grafana/provisioning
- GF_PLUGINS_ALLOW_LOADING_UNSIGNED_PLUGINS=scylladb-scylla-datasource
- GF_AUTH_BASIC_ENABLED=true
- GF_AUTH_ANONYMOUS_ENABLED=false
- GF_AUTH_ANONYMOUS_ORG_ROLE=Admin
- GF_SECURITY_ADMIN_PASSWORD=.....................
user: 1000:1000
volumes:
- /opt/docker/_VOLUMES_/scylla/grafana:/var/lib/grafana
- ./plugins-bundled:/usr/share/grafana/plugins-bundled
networks:
- scylla
And my /var/lib/grafana/dashboards/ver_4.6/scylla-overview.4.6.json
{
"class": "dashboard",
"originalTitle": "Scylla Cluster Metrics",
"overwrite": true,
"rows": [
{
"class": "row",
"panels": [
{
"collapsed": false,
"datasource": null,
"id": "auto",
"gridPos": {
"h": 1,
"w": 24
},
"panels": [],
"title": "Cluster overview $cluster",
"type": "row"
}
]
},
{
"class" : "small_stat_rows",
"panels": []
},
{
"class": "row",
"panels": [
{
"class": "alert_table",
"span": 4,
"title": "Active Alerts"
},
{
"class": "ops_panel",
"span": 2,
"targets": [
{
"expr": "$func(rate(scylla_storage_proxy_coordinator_write_latency_count{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}[1m])) or on ([[by]]) $func(rate(scylla_storage_proxy_coordinator_write_latency_summary_count{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}[1m]))",
"intervalFactor": 1,
"legendFormat": "",
"refId": "A",
"step": 1
}
],
"description": "Write attempts - include all writes that reached the coordinator node, even if they will eventually fail",
"title": "Writes"
},
{
"class": "us_panel",
"span": 2,
"targets": [
{
"expr": "avg(wlatencyp95{by=\"cluster\", cluster=~\"$cluster|$^\",scheduling_group_name=~\"$sg\"}>0)",
"intervalFactor": 1,
"legendFormat": "95%",
"refId": "A",
"step": 1
},
{
"expr": "avg(wlatencyp99{by=\"cluster\", cluster=~\"$cluster|$^\",scheduling_group_name=~\"$sg\"}>0)",
"intervalFactor": 1,
"legendFormat": "99%",
"refId": "B",
"step": 1
}
],
"legend": {
"class": "show_legend"
},
"title": "Write Latencies"
},
{
"class": "ops_panel",
"span": 2,
"targets": [
{
"expr": "$func(rate(scylla_storage_proxy_coordinator_read_latency_count{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}[1m])) or on ([[by]]) $func(rate(scylla_storage_proxy_coordinator_read_latency_summary_count{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}[1m]))",
"intervalFactor": 1,
"legendFormat": "Reads",
"refId": "A",
"step": 1
}
],
"description": "Read attempts - include all reads that reached the coordinator node, even if they will eventually fail",
"title": "Reads"
},
{
"class": "us_panel",
"span": 2,
"targets": [
{
"expr": "avg(rlatencyp95{by=\"cluster\", cluster=~\"$cluster|$^\",scheduling_group_name=~\"$sg\"}>0)",
"intervalFactor": 1,
"legendFormat": "95%",
"refId": "A",
"step": 1
},
{
"expr": "avg(rlatencyp99{by=\"cluster\", cluster=~\"$cluster|$^\",scheduling_group_name=~\"$sg\"}>0)",
"intervalFactor": 1,
"legendFormat": "99%",
"refId": "B",
"step": 1
}
],
"legend": {
"class": "show_legend"
},
"title": "Read Latencies"
}
]
},
{
"class": "row",
"panels": [
{
"collapsed": false,
"datasource": null,
"id": "auto",
"gridPos": {
"h": 1,
"w": 24
},
"panels": [],
"title": "",
"repeat": "dc",
"type": "row"
}
]
},
{
"class": "header_row",
"panels": [
{
"class": "plain_text",
"content": "<h1 style=\"color:#5780C1; border-bottom: 3px solid #5780C1;\">Information for $dc</h1>"
}
],
"title": "New row"
},
{
"class": "row",
"panels": [
{
"class": "vertical_lcd",
"targets": [
{
"expr": "avg(scylla_reactor_utilization{instance=~\"[[node]]\",cluster=~\"$cluster\", dc=~\"$dc\", shard=~\"[[shard]]\"} )",
"interval": "",
"legendFormat": "",
"instant": true,
"refId": "A"
}
],
"title": "Load"
},
{
"class": "bytes_panel",
"gridPos": {
"w": 3
},
"targets": [
{
"expr": "Avg(node_filesystem_size_bytes{mountpoint=\"$mount_point\", dc=~\"$dc\", instance=~\"$node\"}) by ([[by]])-avg(node_filesystem_avail_bytes{mountpoint=\"$mount_point\", dc=~\"$dc\", instance=~\"$node\"}) by ([[by]])",
"intervalFactor": 1,
"legendFormat": "Avg Usage {{[[by]]}}",
"metric": "",
"refId": "A",
"step": 1
},
{
"expr": "avg(node_filesystem_size_bytes{mountpoint=\"$mount_point\", dc=~\"$dc\", instance=~\"$node\"}) by ([[by]])",
"legendFormat": "Size {{[[by]]}}",
"interval": "",
"refId": "B"
}
],
"fieldConfig": {
"defaults": {
"class": "fieldConfig_defaults",
"unit": "bytes"
},
"overrides": [
{
"matcher": {
"id": "byFrameRefID",
"options": "B"
},
"properties": [
{
"id": "custom.lineStyle",
"value": {
"fill": "dash",
"dash": [
10,
10
]
}
},
{
"id": "custom.lineWidth",
"value": 2
}
]
}
]
},
"options": {
"class":"desc_tooltip_options"
},
"description": "The average Disk usage per [[by]].\n\n The dashed line represent the total size.",
"title": "Average Disk Usage"
},
{
"class": "graph_panel_int",
"span": 2,
"targets": [
{
"expr": "$func(scylla_compaction_manager_compactions{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}) by ([[by]])",
"intervalFactor": 1,
"legendFormat": "",
"metric": "",
"refId": "A",
"step": 1
}
],
"description": "scylla_compaction_manager_compactions",
"title": "Running Compactions"
},
{
"class": "ops_panel",
"description": "The Hits and Misses",
"span": 3,
"targets": [
{
"expr": "$func(rate(scylla_cache_row_hits{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}[1m])) by ([[by]])",
"intervalFactor": 1,
"legendFormat": "Hit {{[[by]]}}",
"refId": "A",
"step": 10
},
{
"expr": "$func(rate(scylla_cache_row_misses{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}[1m])) by ([[by]])",
"intervalFactor": 1,
"legendFormat": "Misses {{[[by]]}}",
"refId": "B",
"step": 10
}
],
"legend": {
"class": "show_legend"
},
"title": "Cache Hits/Misses"
},
{
"class":"small_nodes_table",
"gridPos": {
"h": 17,
"w": 10
}
},
{
"class": "ops_panel",
"span": 3,
"targets": [
{
"expr": "$func(rate(scylla_storage_proxy_coordinator_write_latency_count{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}[1m])) by ([[by]]) or on([[by]]) $func(rate(scylla_storage_proxy_coordinator_write_latency_summary_count{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}[1m])) by ([[by]])",
"intervalFactor": 1,
"legendFormat": "Writes",
"refId": "A",
"step": 1
},
{
"expr": "$func(rate(scylla_storage_proxy_coordinator_write_latency_count{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}[1m] offset 1d)) or on ([[by]]) $func(rate(scylla_storage_proxy_coordinator_write_latency_summary_count{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}[1m] offset 1d))",
"legendFormat": "1 Day Ago",
"interval": "",
"intervalFactor": 1,
"refId": "B",
"step": 1
},
{
"expr": "$func(rate(scylla_storage_proxy_coordinator_write_latency_count{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}[1m] offset 1w)) or on ([[by]]) $func(rate(scylla_storage_proxy_coordinator_write_latency_summary_count{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}[1m] offset 1w))",
"legendFormat": "1 Week Ago",
"interval": "",
"intervalFactor": 1,
"refId": "C",
"step": 1
}
],
"legend": {
"class": "show_legend"
},
"seriesOverrides": [
{
"alias": "1 Day Ago",
"dashes": true,
"dashLength": 4
},
{
"alias": "1 Week Ago",
"dashes": true,
"dashLength": 2
}
],
"description": "Write attempts - include all writes that reached the coordinator node, even if they will eventually fail",
"title": "Writes"
},
{
"class": "us_panel",
"span": 2,
"targets": [
{
"expr": "avg(wlatencyp95{by=\"[[by]]\", instance=~\"[[node]]|^$\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]|$^\",scheduling_group_name=~\"$sg\"}>0) by ([[by]],sg)",
"intervalFactor": 1,
"legendFormat": "95% {{[[by]]}}",
"refId": "A",
"step": 1
},
{
"expr": "avg(wlatencyp99{by=\"[[by]]\", instance=~\"[[node]]|^$\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]|$^\",scheduling_group_name=~\"$sg\"}>0) by ([[by]],sg)",
"intervalFactor": 1,
"legendFormat": "99% {{[[by]]}}",
"refId": "B",
"step": 1
}
],
"legend": {
"class": "show_legend"
},
"title": "Write Latencies"
},
{
"class": "ops_panel",
"description": "Requests that Scylla tried to write but timed out. Timeouts are counted in the node that received the request (the coordinator), not at the replicas.",
"span": 2,
"targets": [
{
"expr": "$func(rate(scylla_storage_proxy_coordinator_write_timeouts{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]|$^\"}[1m])) by ([[by]])",
"intervalFactor": 1,
"legendFormat": "Writes {{[[by]]}}",
"refId": "A",
"step": 10
}
],
"legend": {
"class": "show_legend"
},
"title": "Write Timeouts by [[by]]"
},
{
"class": "ops_panel",
"span": 3,
"gridPos": {
"x": 0
},
"targets": [
{
"expr": "$func(rate(scylla_storage_proxy_coordinator_read_latency_count{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\",scheduling_group_name=~\"$sg\"}[1m])) by ([[by]]) or on([[by]]) $func(rate(scylla_storage_proxy_coordinator_read_latency_summary_count{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\",scheduling_group_name=~\"$sg\"}[1m])) by ([[by]])",
"intervalFactor": 1,
"legendFormat": "Reads",
"refId": "A",
"step": 1
},
{
"expr": "$func(rate(scylla_storage_proxy_coordinator_read_latency_count{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\",scheduling_group_name=~\"$sg\"}[1m] offset 1d)) or on ([[by]]) $func(rate(scylla_storage_proxy_coordinator_read_latency_summary_count{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\",scheduling_group_name=~\"$sg\"}[1m] offset 1d))",
"intervalFactor": 1,
"legendFormat": "1 Day Ago",
"refId": "B",
"step": 1
},
{
"expr": "$func(rate(scylla_storage_proxy_coordinator_read_latency_count{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\",scheduling_group_name=~\"$sg\"}[1m] offset 1w)) or on ([[by]]) $func(rate(scylla_storage_proxy_coordinator_read_latency_summary_count{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\",scheduling_group_name=~\"$sg\"}[1m] offset 1w))",
"intervalFactor": 1,
"legendFormat": "1 Week Ago",
"refId": "C",
"step": 1
}
],
"legend": {
"class": "show_legend"
},
"seriesOverrides": [
{
"alias": "1 Day Ago",
"dashes": true,
"dashLength": 4
},
{
"alias": "1 Week Ago",
"dashes": true,
"dashLength": 2
}
],
"description": "Read attempts - include all reads that reached the coordinator node, even if they will eventually fail",
"title": "Reads"
},
{
"class": "us_panel",
"span": 2,
"gridPos": {
"x": 6
},
"targets": [
{
"expr": "avg(rlatencyp95{by=\"[[by]]\", instance=~\"[[node]]|^$\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]|$^\",scheduling_group_name=~\"$sg\"}>0) by([[by]])",
"intervalFactor": 1,
"legendFormat": "95% {{[[by]]}}",
"refId": "A",
"step": 1
},
{
"expr": "avg(rlatencyp99{by=\"[[by]]\", instance=~\"[[node]]|^$\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]|$^\",scheduling_group_name=~\"$sg\"}>0) by([[by]])",
"intervalFactor": 1,
"legendFormat": "99% {{[[by]]}}",
"refId": "B",
"step": 1
}
],
"legend": {
"class": "show_legend"
},
"title": "Read Latencies"
},
{
"class": "ops_panel",
"description": "Requests that Scylla tried to read but timed out. Timeouts are counted in the node that received the request (the coordinator), not at the replicas.",
"span": 2,
"gridPos": {
"x": 10
},
"targets": [
{
"expr": "$func(rate(scylla_storage_proxy_coordinator_read_timeouts{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}[1m])+rate(scylla_storage_proxy_coordinator_cas_read_timeouts{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}[1m])+rate(scylla_storage_proxy_coordinator_range_timeouts{instance=~\"[[node]]\",cluster=~\"$cluster|$^\", dc=~\"$dc\", shard=~\"[[shard]]\"}[1m])) by ([[by]])",
"intervalFactor": 1,
"legendFormat": "Read {{[[by]]}}",
"refId": "A",
"step": 10
}
],
"legend": {
"class": "show_legend"
},
"title": "Read Timeouts by [[by]]"
},
{
"class": "plain_text",
"dashproductreject": "no-version-check",
"gridPos": {
"w": 10,
"x": 14,
"h": 1
},
"options": {
"mode": "html",
"content": "<img src=\"https://repositories.scylladb.com/scylla/imgversion/$all_scyllas_versions/scylla\"></img>"
}
},
{
"class": "plain_text",
"dashproduc": "no-version-check",
"gridPos": {
"w": 10,
"x": 14,
"h": 1
},
"options": {
"mode": "html",
"content": ""
}
}
]
},
{
"class": "row",
"panels": [
{
"collapsed": false,
"datasource": null,
"id": "auto",
"gridPos": {
"h": 1,
"w": 24
},
"panels": [],
"title": "",
"type": "row"
}
]
},
{
"class": "header_row",
"panels": [
{
"class": "plain_text",
"content": "<h1 style=\"color:#5780C1; border-bottom: 3px solid #5780C1;\">Advisor</h1>"
}
],
"title": "New row"
},
{
"class": "row",
"panels": [
{
"class":"advisor_table",
"dashversion":">4.1"
},
{
"class":"enterprise_advisor_table",
"dashversion":">2019.1"
}
]
},
{
"class": "user_panels_collapse",
"panels": []
},
{
"class": "user_panel_row_header",
"panels": []
},
{
"class": "user_panels_row",
"panels": []
},
{
"class": "monitoring_version_row",
"panels": []
}
],
"tags": [
"4.6"
],
"templating": {
"list": [
{
"class": "by_template_var",
"current": {
"tags": [],
"text": "DC",
"value": "dc"
},
"options": [
{
"selected": false,
"text": "Cluster",
"value": "cluster"
},
{
"selected": true,
"text": "DC",
"value": "dc"
},
{
"selected": false,
"text": "Instance",
"value": "instance"
},
{
"selected": false,
"text": "instance,shard",
"value": "instance,shard"
}
]
},
{
"class": "template_variable_single",
"label": "cluster",
"name": "cluster",
"query": "label_values(scylla_reactor_utilization, cluster)"
},
{
"class": "template_variable_all",
"label": "dc",
"name": "dc",
"query": "label_values(scylla_reactor_utilization{cluster=~\"$cluster\"}, dc)"
},
{
"class": "template_variable_all",
"label": "node",
"name": "node",
"query": "label_values(scylla_reactor_utilization{cluster=~\"$cluster|$^\", dc=~\"$dc\"}, instance)"
},
{
"class": "template_variable_all",
"label": "shard",
"name": "shard",
"allValue":".+",
"query": "label_values(scylla_reactor_utilization{cluster=~\"$cluster|$^\"},shard)",
"sort": 3
},
{
"class": "template_variable_single",
"current": {
"text": "/var/lib/scylla",
"value": "/var/lib/scylla"
},
"label": "Mount path",
"name": "mount_point",
"query": "node_filesystem_avail_bytes",
"regex": "/mountpoint=\"([^\"]*)\".*/",
"sort": 0
},
{
"class": "template_variable_single",
"current": {
"selected": true,
"text": [
"statement"
],
"value": [
"statement"
]
},
"label": "SG",
"name": "sg",
"includeAll":true,
"multi":true,
"dashversion":[">4.3"],
"query": "label_values(rlatencyp99{cluster=~\"$cluster\", scheduling_group_name!~\"atexit|gossip|mem_compaction|memtable|streaming|background_reclaim|compaction|main|memtable_to_cache\"},scheduling_group_name)",
"sort": 3
},
{
"class": "template_variable_single",
"dashversion":[">2019.1"],
"current": {
"selected": true,
"text": [
"sl:default"
],
"value": [
"sl:default"
]
},
"label": "SG",
"name": "sg",
"includeAll":true,
"multi":true,
"query": "label_values(rlatencyp99{cluster=~\"$cluster\", scheduling_group_name!~\"atexit|gossip|mem_compaction|memtable|streaming|background_reclaim|compaction|main|memtable_to_cache\"},scheduling_group_name)",
"sort": 3
},
{
"class": "aggregation_function"
},
{
"class": "template_variable_custom",
"current": {
"text": "4-6",
"value": "4-6"
},
"name": "dash_version",
"options": [
{
"selected": true,
"text": "4-6",
"value": "4-6"
}
],
"query": "4-6"
},
{
"class": "template_variable_all",
"hide":2,
"name": "all_scyllas_versions",
"current":{
"selected":true,
"text":[
"All"
],
"value":[
"$__all"
]
},
"query": "label_values(scylla_scylladb_current_version{cluster=~\"$cluster|$^\"}, version)"
},
{
"class": "template_variable_all",
"hide":2,
"name": "count_dc",
"definition": "query_result(count(up{job=\"scylla\"}) by (dc))",
"query": {
"query": "query_result(count(up{job=\"scylla\"}) by (dc))",
"refId": "StandardVariableQuery"
},
"regex": "/(?<dc>\\{dc=\"[^\"]+\".* \\d+) .*/"
},
{
"class": "template_variable_custom",
"current": {
"text": "4.6",
"value": "4.6"
},
"name": "scylla_version",
"options": [
{
"selected": true,
"text": "4.6",
"value": "4.6"
}
],
"query": "4.6"
},
{
"class": "monitor_version_var"
}
]
},
"time": {
"from": "now-30m",
"to": "now"
},
"title": "Overview",
"uid": "overview-4-6",
"version": 1
}