nwaku-compose/monitoring/configuration/pg-exporter-queries.yml

pg_replication:
  query: "SELECT CASE WHEN NOT pg_is_in_recovery() THEN 0 ELSE GREATEST (0, EXTRACT(EPOCH FROM (now() - pg_last_xact_replay_timestamp()))) END AS lag"
  master: true
  metrics:
    - lag:
        usage: "GAUGE"
        description: "Replication lag behind master in seconds"

pg_postmaster:
  query: "SELECT pg_postmaster_start_time as start_time_seconds from pg_postmaster_start_time()"
  master: true
  metrics:
    - start_time_seconds:
        usage: "GAUGE"
        description: "Time at which postmaster started"

pg_stat_user_tables:
  query: |
   SELECT
     current_database() datname,
     schemaname,
     relname,
     seq_scan,
     seq_tup_read,
     idx_scan,
     idx_tup_fetch,
     n_tup_ins,
     n_tup_upd,
     n_tup_del,
     n_tup_hot_upd,
     n_live_tup,
     n_dead_tup,
     n_mod_since_analyze,
     COALESCE(last_vacuum, '1970-01-01Z') as last_vacuum,
     COALESCE(last_autovacuum, '1970-01-01Z') as last_autovacuum,
     COALESCE(last_analyze, '1970-01-01Z') as last_analyze,
     COALESCE(last_autoanalyze, '1970-01-01Z') as last_autoanalyze,
     vacuum_count,
     autovacuum_count,
     analyze_count,
     autoanalyze_count
   FROM
     pg_stat_user_tables
  metrics:
    - datname:
        usage: "LABEL"
        description: "Name of current database"
    - schemaname:
        usage: "LABEL"
        description: "Name of the schema that this table is in"
    - relname:
        usage: "LABEL"
        description: "Name of this table"
    - seq_scan:
        usage: "COUNTER"
        description: "Number of sequential scans initiated on this table"
    - seq_tup_read:
        usage: "COUNTER"
        description: "Number of live rows fetched by sequential scans"
    - idx_scan:
        usage: "COUNTER"
        description: "Number of index scans initiated on this table"
    - idx_tup_fetch:
        usage: "COUNTER"
        description: "Number of live rows fetched by index scans"
    - n_tup_ins:
        usage: "COUNTER"
        description: "Number of rows inserted"
    - n_tup_upd:
        usage: "COUNTER"
        description: "Number of rows updated"
    - n_tup_del:
        usage: "COUNTER"
        description: "Number of rows deleted"
    - n_tup_hot_upd:
        usage: "COUNTER"
        description: "Number of rows HOT updated (i.e., with no separate index update required)"
    - n_live_tup:
        usage: "GAUGE"
        description: "Estimated number of live rows"
    - n_dead_tup:
        usage: "GAUGE"
        description: "Estimated number of dead rows"
    - n_mod_since_analyze:
        usage: "GAUGE"
        description: "Estimated number of rows changed since last analyze"
    - last_vacuum:
        usage: "GAUGE"
        description: "Last time at which this table was manually vacuumed (not counting VACUUM FULL)"
    - last_autovacuum:
        usage: "GAUGE"
        description: "Last time at which this table was vacuumed by the autovacuum daemon"
    - last_analyze:
        usage: "GAUGE"
        description: "Last time at which this table was manually analyzed"
    - last_autoanalyze:
        usage: "GAUGE"
        description: "Last time at which this table was analyzed by the autovacuum daemon"
    - vacuum_count:
        usage: "COUNTER"
        description: "Number of times this table has been manually vacuumed (not counting VACUUM FULL)"
    - autovacuum_count:
        usage: "COUNTER"
        description: "Number of times this table has been vacuumed by the autovacuum daemon"
    - analyze_count:
        usage: "COUNTER"
        description: "Number of times this table has been manually analyzed"
    - autoanalyze_count:
        usage: "COUNTER"
        description: "Number of times this table has been analyzed by the autovacuum daemon"

pg_statio_user_tables:
  query: "SELECT current_database() datname, schemaname, relname, heap_blks_read, heap_blks_hit, idx_blks_read, idx_blks_hit, toast_blks_read, toast_blks_hit, tidx_blks_read, tidx_blks_hit FROM pg_statio_user_tables"
  metrics:
    - datname:
        usage: "LABEL"
        description: "Name of current database"
    - schemaname:
        usage: "LABEL"
        description: "Name of the schema that this table is in"
    - relname:
        usage: "LABEL"
        description: "Name of this table"
    - heap_blks_read:
        usage: "COUNTER"
        description: "Number of disk blocks read from this table"
    - heap_blks_hit:
        usage: "COUNTER"
        description: "Number of buffer hits in this table"
    - idx_blks_read:
        usage: "COUNTER"
        description: "Number of disk blocks read from all indexes on this table"
    - idx_blks_hit:
        usage: "COUNTER"
        description: "Number of buffer hits in all indexes on this table"
    - toast_blks_read:
        usage: "COUNTER"
        description: "Number of disk blocks read from this table's TOAST table (if any)"
    - toast_blks_hit:
        usage: "COUNTER"
        description: "Number of buffer hits in this table's TOAST table (if any)"
    - tidx_blks_read:
        usage: "COUNTER"
        description: "Number of disk blocks read from this table's TOAST table indexes (if any)"
    - tidx_blks_hit:
        usage: "COUNTER"
        description: "Number of buffer hits in this table's TOAST table indexes (if any)"

# WARNING: This set of metrics can be very expensive on a busy server as every unique query executed will create an additional time series
pg_stat_statements:
  query: "SELECT t2.rolname, t3.datname, queryid, calls, ( total_plan_time + total_exec_time ) / 1000 as total_time_seconds, ( min_plan_time + min_exec_time ) / 1000 as min_time_seconds, ( max_plan_time + max_exec_time ) / 1000 as max_time_seconds, ( mean_plan_time + mean_exec_time ) / 1000 as mean_time_seconds, ( stddev_plan_time + stddev_exec_time )  / 1000 as stddev_time_seconds, rows, shared_blks_hit, shared_blks_read, shared_blks_dirtied, shared_blks_written, local_blks_hit, local_blks_read, local_blks_dirtied, local_blks_written, temp_blks_read, temp_blks_written, blk_read_time / 1000 as blk_read_time_seconds, blk_write_time / 1000 as blk_write_time_seconds FROM pg_stat_statements t1 JOIN pg_roles t2 ON (t1.userid=t2.oid) JOIN pg_database t3 ON (t1.dbid=t3.oid) WHERE t2.rolname != 'rdsadmin' AND queryid IS NOT NULL"
  master: true
  metrics:
    - rolname:
        usage: "LABEL"
        description: "Name of user"
    - datname:
        usage: "LABEL"
        description: "Name of database"
    - queryid:
        usage: "LABEL"
        description: "Query ID"
    - calls:
        usage: "COUNTER"
        description: "Number of times executed"
    - total_time_seconds:
        usage: "COUNTER"
        description: "Total time spent in the statement, in milliseconds"
    - min_time_seconds:
        usage: "GAUGE"
        description: "Minimum time spent in the statement, in milliseconds"
    - max_time_seconds:
        usage: "GAUGE"
        description: "Maximum time spent in the statement, in milliseconds"
    - mean_time_seconds:
        usage: "GAUGE"
        description: "Mean time spent in the statement, in milliseconds"
    - stddev_time_seconds:
        usage: "GAUGE"
        description: "Population standard deviation of time spent in the statement, in milliseconds"
    - rows:
        usage: "COUNTER"
        description: "Total number of rows retrieved or affected by the statement"
    - shared_blks_hit:
        usage: "COUNTER"
        description: "Total number of shared block cache hits by the statement"
    - shared_blks_read:
        usage: "COUNTER"
        description: "Total number of shared blocks read by the statement"
    - shared_blks_dirtied:
        usage: "COUNTER"
        description: "Total number of shared blocks dirtied by the statement"
    - shared_blks_written:
        usage: "COUNTER"
        description: "Total number of shared blocks written by the statement"
    - local_blks_hit:
        usage: "COUNTER"
        description: "Total number of local block cache hits by the statement"
    - local_blks_read:
        usage: "COUNTER"
        description: "Total number of local blocks read by the statement"
    - local_blks_dirtied:
        usage: "COUNTER"
        description: "Total number of local blocks dirtied by the statement"
    - local_blks_written:
        usage: "COUNTER"
        description: "Total number of local blocks written by the statement"
    - temp_blks_read:
        usage: "COUNTER"
        description: "Total number of temp blocks read by the statement"
    - temp_blks_written:
        usage: "COUNTER"
        description: "Total number of temp blocks written by the statement"
    - blk_read_time_seconds:
        usage: "COUNTER"
        description: "Total time the statement spent reading blocks, in milliseconds (if track_io_timing is enabled, otherwise zero)"
    - blk_write_time_seconds:
        usage: "COUNTER"
        description: "Total time the statement spent writing blocks, in milliseconds (if track_io_timing is enabled, otherwise zero)"

pg_process_idle:
  query: |
    WITH
      metrics AS (
        SELECT
          application_name,
          SUM(EXTRACT(EPOCH FROM (CURRENT_TIMESTAMP - state_change))::bigint)::float AS process_idle_seconds_sum,
          COUNT(*) AS process_idle_seconds_count
        FROM pg_stat_activity
        WHERE state = 'idle'
        GROUP BY application_name
      ),
      buckets AS (
        SELECT
          application_name,
          le,
          SUM(
            CASE WHEN EXTRACT(EPOCH FROM (CURRENT_TIMESTAMP - state_change)) <= le
              THEN 1
              ELSE 0
            END
          )::bigint AS bucket
        FROM
          pg_stat_activity,
          UNNEST(ARRAY[1, 2, 5, 15, 30, 60, 90, 120, 300]) AS le
        GROUP BY application_name, le
        ORDER BY application_name, le
      )
    SELECT
      application_name,
      process_idle_seconds_sum as seconds_sum,
      process_idle_seconds_count as seconds_count,
      ARRAY_AGG(le) AS seconds,
      ARRAY_AGG(bucket) AS seconds_bucket
    FROM metrics JOIN buckets USING (application_name)
    GROUP BY 1, 2, 3
  metrics:
    - application_name:
        usage: "LABEL"
        description: "Application Name"
    - seconds:
        usage: "HISTOGRAM"
        description: "Idle time of server processes"

pg_tb_stats:
  query: |
    select pubsubtopic, count(*) AS messages FROM (SELECT id, array_agg(pubsubtopic ORDER BY pubsubtopic) AS pubsubtopic FROM messages GROUP BY id) sub GROUP BY pubsubtopic ORDER BY pubsubtopic;
  metrics:
    - pubsubtopic:
        usage: "LABEL"
        description: "pubsubtopic"
    - messages:
        usage: "GAUGE"
        description: "Number of messages for the given pubsub topic"

pg_tb_messages:
  query: |
    SELECT
      COUNT(ID)
    FROM messages
  metrics:
    - count:
        usage: "GAUGE"
        description: "Row count in `messages` table"
Initial release of nwaku-compose 2023-08-09 10:36:03 +00:00			`pg_replication:`
			`query: "SELECT CASE WHEN NOT pg_is_in_recovery() THEN 0 ELSE GREATEST (0, EXTRACT(EPOCH FROM (now() - pg_last_xact_replay_timestamp()))) END AS lag"`
			`master: true`
			`metrics:`
			`- lag:`
			`usage: "GAUGE"`
			`description: "Replication lag behind master in seconds"`

			`pg_postmaster:`
			`query: "SELECT pg_postmaster_start_time as start_time_seconds from pg_postmaster_start_time()"`
			`master: true`
			`metrics:`
			`- start_time_seconds:`
			`usage: "GAUGE"`
			`description: "Time at which postmaster started"`

			`pg_stat_user_tables:`
			`query: \|`
			`SELECT`
			`current_database() datname,`
			`schemaname,`
			`relname,`
			`seq_scan,`
			`seq_tup_read,`
			`idx_scan,`
			`idx_tup_fetch,`
			`n_tup_ins,`
			`n_tup_upd,`
			`n_tup_del,`
			`n_tup_hot_upd,`
			`n_live_tup,`
			`n_dead_tup,`
			`n_mod_since_analyze,`
			`COALESCE(last_vacuum, '1970-01-01Z') as last_vacuum,`
			`COALESCE(last_autovacuum, '1970-01-01Z') as last_autovacuum,`
			`COALESCE(last_analyze, '1970-01-01Z') as last_analyze,`
			`COALESCE(last_autoanalyze, '1970-01-01Z') as last_autoanalyze,`
			`vacuum_count,`
			`autovacuum_count,`
			`analyze_count,`
			`autoanalyze_count`
			`FROM`
			`pg_stat_user_tables`
			`metrics:`
			`- datname:`
			`usage: "LABEL"`
			`description: "Name of current database"`
			`- schemaname:`
			`usage: "LABEL"`
			`description: "Name of the schema that this table is in"`
			`- relname:`
			`usage: "LABEL"`
			`description: "Name of this table"`
			`- seq_scan:`
			`usage: "COUNTER"`
			`description: "Number of sequential scans initiated on this table"`
			`- seq_tup_read:`
			`usage: "COUNTER"`
			`description: "Number of live rows fetched by sequential scans"`
			`- idx_scan:`
			`usage: "COUNTER"`
			`description: "Number of index scans initiated on this table"`
			`- idx_tup_fetch:`
			`usage: "COUNTER"`
			`description: "Number of live rows fetched by index scans"`
			`- n_tup_ins:`
			`usage: "COUNTER"`
			`description: "Number of rows inserted"`
			`- n_tup_upd:`
			`usage: "COUNTER"`
			`description: "Number of rows updated"`
			`- n_tup_del:`
			`usage: "COUNTER"`
			`description: "Number of rows deleted"`
			`- n_tup_hot_upd:`
			`usage: "COUNTER"`
			`description: "Number of rows HOT updated (i.e., with no separate index update required)"`
			`- n_live_tup:`
			`usage: "GAUGE"`
			`description: "Estimated number of live rows"`
			`- n_dead_tup:`
			`usage: "GAUGE"`
			`description: "Estimated number of dead rows"`
			`- n_mod_since_analyze:`
			`usage: "GAUGE"`
			`description: "Estimated number of rows changed since last analyze"`
			`- last_vacuum:`
			`usage: "GAUGE"`
			`description: "Last time at which this table was manually vacuumed (not counting VACUUM FULL)"`
			`- last_autovacuum:`
			`usage: "GAUGE"`
			`description: "Last time at which this table was vacuumed by the autovacuum daemon"`
			`- last_analyze:`
			`usage: "GAUGE"`
			`description: "Last time at which this table was manually analyzed"`
			`- last_autoanalyze:`
			`usage: "GAUGE"`
			`description: "Last time at which this table was analyzed by the autovacuum daemon"`
			`- vacuum_count:`
			`usage: "COUNTER"`
			`description: "Number of times this table has been manually vacuumed (not counting VACUUM FULL)"`
			`- autovacuum_count:`
			`usage: "COUNTER"`
			`description: "Number of times this table has been vacuumed by the autovacuum daemon"`
			`- analyze_count:`
			`usage: "COUNTER"`
			`description: "Number of times this table has been manually analyzed"`
			`- autoanalyze_count:`
			`usage: "COUNTER"`
			`description: "Number of times this table has been analyzed by the autovacuum daemon"`

			`pg_statio_user_tables:`
			`query: "SELECT current_database() datname, schemaname, relname, heap_blks_read, heap_blks_hit, idx_blks_read, idx_blks_hit, toast_blks_read, toast_blks_hit, tidx_blks_read, tidx_blks_hit FROM pg_statio_user_tables"`
			`metrics:`
			`- datname:`
			`usage: "LABEL"`
			`description: "Name of current database"`
			`- schemaname:`
			`usage: "LABEL"`
			`description: "Name of the schema that this table is in"`
			`- relname:`
			`usage: "LABEL"`
			`description: "Name of this table"`
			`- heap_blks_read:`
			`usage: "COUNTER"`
			`description: "Number of disk blocks read from this table"`
			`- heap_blks_hit:`
			`usage: "COUNTER"`
			`description: "Number of buffer hits in this table"`
			`- idx_blks_read:`
			`usage: "COUNTER"`
			`description: "Number of disk blocks read from all indexes on this table"`
			`- idx_blks_hit:`
			`usage: "COUNTER"`
			`description: "Number of buffer hits in all indexes on this table"`
			`- toast_blks_read:`
			`usage: "COUNTER"`
			`description: "Number of disk blocks read from this table's TOAST table (if any)"`
			`- toast_blks_hit:`
			`usage: "COUNTER"`
			`description: "Number of buffer hits in this table's TOAST table (if any)"`
			`- tidx_blks_read:`
			`usage: "COUNTER"`
			`description: "Number of disk blocks read from this table's TOAST table indexes (if any)"`
			`- tidx_blks_hit:`
			`usage: "COUNTER"`
			`description: "Number of buffer hits in this table's TOAST table indexes (if any)"`

			`# WARNING: This set of metrics can be very expensive on a busy server as every unique query executed will create an additional time series`
			`pg_stat_statements:`
			query: "SELECT t2.rolname, t3.datname, queryid, calls, ( total_plan_time + total_exec_time ) / 1000 as total_time_seconds, ( min_plan_time + min_exec_time ) / 1000 as min_time_seconds, ( max_plan_time + max_exec_time ) / 1000 as max_time_seconds, ( mean_plan_time + mean_exec_time ) / 1000 as mean_time_seconds, ( stddev_plan_time + stddev_exec_time ) / 1000 as stddev_time_seconds, rows, shared_blks_hit, shared_blks_read, shared_blks_dirtied, shared_blks_written, local_blks_hit, local_blks_read, local_blks_dirtied, local_blks_written, temp_blks_read, temp_blks_written, blk_read_time / 1000 as blk_read_time_seconds, blk_write_time / 1000 as blk_write_time_seconds FROM pg_stat_statements t1 JOIN pg_roles t2 ON (t1.userid=t2.oid) JOIN pg_database t3 ON (t1.dbid=t3.oid) WHERE t2.rolname != 'rdsadmin' AND queryid IS NOT NULL"
			`master: true`
			`metrics:`
			`- rolname:`
			`usage: "LABEL"`
			`description: "Name of user"`
			`- datname:`
			`usage: "LABEL"`
			`description: "Name of database"`
			`- queryid:`
			`usage: "LABEL"`
			`description: "Query ID"`
			`- calls:`
			`usage: "COUNTER"`
			`description: "Number of times executed"`
			`- total_time_seconds:`
			`usage: "COUNTER"`
			`description: "Total time spent in the statement, in milliseconds"`
			`- min_time_seconds:`
			`usage: "GAUGE"`
			`description: "Minimum time spent in the statement, in milliseconds"`
			`- max_time_seconds:`
			`usage: "GAUGE"`
			`description: "Maximum time spent in the statement, in milliseconds"`
			`- mean_time_seconds:`
			`usage: "GAUGE"`
			`description: "Mean time spent in the statement, in milliseconds"`
			`- stddev_time_seconds:`
			`usage: "GAUGE"`
			`description: "Population standard deviation of time spent in the statement, in milliseconds"`
			`- rows:`
			`usage: "COUNTER"`
			`description: "Total number of rows retrieved or affected by the statement"`
			`- shared_blks_hit:`
			`usage: "COUNTER"`
			`description: "Total number of shared block cache hits by the statement"`
			`- shared_blks_read:`
			`usage: "COUNTER"`
			`description: "Total number of shared blocks read by the statement"`
			`- shared_blks_dirtied:`
			`usage: "COUNTER"`
			`description: "Total number of shared blocks dirtied by the statement"`
			`- shared_blks_written:`
			`usage: "COUNTER"`
			`description: "Total number of shared blocks written by the statement"`
			`- local_blks_hit:`
			`usage: "COUNTER"`
			`description: "Total number of local block cache hits by the statement"`
			`- local_blks_read:`
			`usage: "COUNTER"`
			`description: "Total number of local blocks read by the statement"`
			`- local_blks_dirtied:`
			`usage: "COUNTER"`
			`description: "Total number of local blocks dirtied by the statement"`
			`- local_blks_written:`
			`usage: "COUNTER"`
			`description: "Total number of local blocks written by the statement"`
			`- temp_blks_read:`
			`usage: "COUNTER"`
			`description: "Total number of temp blocks read by the statement"`
			`- temp_blks_written:`
			`usage: "COUNTER"`
			`description: "Total number of temp blocks written by the statement"`
			`- blk_read_time_seconds:`
			`usage: "COUNTER"`
			`description: "Total time the statement spent reading blocks, in milliseconds (if track_io_timing is enabled, otherwise zero)"`
			`- blk_write_time_seconds:`
			`usage: "COUNTER"`
			`description: "Total time the statement spent writing blocks, in milliseconds (if track_io_timing is enabled, otherwise zero)"`

			`pg_process_idle:`
			`query: \|`
			`WITH`
			`metrics AS (`
			`SELECT`
			`application_name,`
			`SUM(EXTRACT(EPOCH FROM (CURRENT_TIMESTAMP - state_change))::bigint)::float AS process_idle_seconds_sum,`
			`COUNT(*) AS process_idle_seconds_count`
			`FROM pg_stat_activity`
			`WHERE state = 'idle'`
			`GROUP BY application_name`
			`),`
			`buckets AS (`
			`SELECT`
			`application_name,`
			`le,`
			`SUM(`
			`CASE WHEN EXTRACT(EPOCH FROM (CURRENT_TIMESTAMP - state_change)) <= le`
			`THEN 1`
			`ELSE 0`
			`END`
			`)::bigint AS bucket`
			`FROM`
			`pg_stat_activity,`
			`UNNEST(ARRAY[1, 2, 5, 15, 30, 60, 90, 120, 300]) AS le`
			`GROUP BY application_name, le`
			`ORDER BY application_name, le`
			`)`
			`SELECT`
			`application_name,`
			`process_idle_seconds_sum as seconds_sum,`
			`process_idle_seconds_count as seconds_count,`
			`ARRAY_AGG(le) AS seconds,`
			`ARRAY_AGG(bucket) AS seconds_bucket`
			`FROM metrics JOIN buckets USING (application_name)`
			`GROUP BY 1, 2, 3`
			`metrics:`
			`- application_name:`
			`usage: "LABEL"`
			`description: "Application Name"`
			`- seconds:`
			`usage: "HISTOGRAM"`
			`description: "Idle time of server processes"`
nwaku-monitor/pg-exporter-queries: add new panel to show num msgs per shard (#43) 2024-02-02 14:12:39 +00:00
			`pg_tb_stats:`
			`query: \|`
			`select pubsubtopic, count(*) AS messages FROM (SELECT id, array_agg(pubsubtopic ORDER BY pubsubtopic) AS pubsubtopic FROM messages GROUP BY id) sub GROUP BY pubsubtopic ORDER BY pubsubtopic;`
			`metrics:`
			`- pubsubtopic:`
			`usage: "LABEL"`
			`description: "pubsubtopic"`
			`- messages:`
			`usage: "GAUGE"`
			`description: "Number of messages for the given pubsub topic"`
Add Postgres Dashboards using Prometheus source (#40) 2024-02-02 14:52:33 +00:00
			`pg_tb_messages:`
			`query: \|`
			`SELECT`
			`COUNT(ID)`
			`FROM messages`
			`metrics:`
			`- count:`
			`usage: "GAUGE"`
			description: "Row count in `messages` table"