Data model advice for GitHub Actions workflows

I apologize for the delayed response here. I missed the notification. The table information is exactly what I’m after, but I need to be able to plot it. I have a working prototype in Python using the http API.

def format_query_string_for_q(params=None, reverse_name_map=None):
    values_str = "{"
    values_parts = []
    for k, v in (params or {}).items():
        if reverse_name_map and k in reverse_name_map:
            k = reverse_name_map[k]
        if v:
            if hasattr(v, 'lower'):
                if k in ['name', 'rapids.labels']:
                    values_parts.append(f'{k}=~"{v}"')
                else:
                    values_parts.append(f'{k}="{v}"')
            else:
                values_parts.append("(" + " || ".join(f'{k}="{iv}"' for iv in v) + ")")

    values_str += " && ".join(values_parts) + "}"
    return values_str

def retrieve_data(base_url, start=-5, end=datetime.datetime.now(), params=None, reverse_name_map=None):    
    if isinstance(start, int):
        start = end + datetime.timedelta(start)

    values_string = format_query_string_for_q(params=params, reverse_name_map=reverse_name_map)
    query_params = {
        "limit": 100,   # Number of traces (each has multiple spans)
        "spss": 1000,    # Number of spans per span section (roughly one span section per trace)
        "start": int(start.timestamp()),
        "end": int(end.timestamp()),
        "q": values_string
    }
    response = requests.get(urljoin(base_url, "api/search"),
                            params=query_params,
                            cert=(client_cert.as_posix(), client_key.as_posix()),
                            verify=ca.as_posix())
    if response.status_code >=400:
        print(response)
    df = pd.json_normalize(response.json()['traces'])
    try:
        expanded = pd.concat([df.filter(["traceID", "startTimeUnixNano", "durationMs"]),
                        df.explode("spanSet.spans")['spanSet.spans'].apply(pd.Series).rename(
                            columns={"startTimeUnixNano": "spanStartTime", "durationNanos": "spanDurationM"}
                            )], axis=1)
        expanded['spanDurationM'] = (pd.to_numeric(expanded['spanDurationM'], errors='coerce').fillna(0)/1E9/60)
        expanded['spanStartTime'] = pd.to_datetime(pd.to_numeric(expanded["spanStartTime"])/1E9,unit='s')
        # cut off jobs with delay times longer than 10 hours
        expanded = expanded.query('spanDurationM < 600')
    except KeyError:
        print(df)
        raise
    col_subset = expanded.filter(["traceID", "startTimeUnixNano", "durationMs", "spanID", "spanStartTime", "spanDurationM"])
    return col_subset

This feeds into a plot/dashboard tool that allows us to filter on different resource attributes that we set. Each point in this plot represents one span that matches the filters. One trace may contribute more than one point in this plot. Each row of widgets is a “dimension,” and selecting more than one value for a dimension creates a separate series for each combination of all dimensions.

It would still be desirable to do this kind of plot in Grafana, because a unified view of the individual trace viewer and other dashboard creation tools would be nicer than a Python dashboard that we have to host separately.