Compare commits

...

2 commits

Author SHA1 Message Date
ab85ba751a
alertmanager: enable e2e_dead_man_switch
All checks were successful
Flake checks / Check (pull_request) Successful in 23m13s
2024-11-12 13:41:42 +01:00
a9c5edfeb3
alertmanager: don't alert on high memory page faults
This alert is non actionable, we still monitor high memory usage.
2024-11-12 13:40:46 +01:00

View file

@ -24,10 +24,10 @@ lib.mapAttrsToList
# description = "Configurations of AlertManager cluster instances are out of sync.";
# };
#alert_manager_e2e_dead_man_switch = {
# condition = "vector(1)";
# description = "Prometheus DeadManSwitch is an always-firing alert. It's used as an end-to-end test of Prometheus through the Alertmanager.";
#};
alert_manager_e2e_dead_man_switch = {
condition = "vector(1)";
description = "Prometheus DeadManSwitch is an always-firing alert. It's used as an end-to-end test of Prometheus through the Alertmanager.";
};
# prometheus_not_connected_to_alertmanager = {
# condition = "prometheus_notifications_alertmanagers_discovered < 1";
@ -234,10 +234,10 @@ lib.mapAttrsToList
};
*/
host_memory_under_memory_pressure = {
condition = "rate(node_vmstat_pgmajfault[1m]) > 1000";
description = "{{$labels.instance}}: The node is under heavy memory pressure. High rate of major page faults: {{$value}}";
};
#host_memory_under_memory_pressure = {
# condition = "rate(node_vmstat_pgmajfault[1m]) > 1000";
# description = "{{$labels.instance}}: The node is under heavy memory pressure. High rate of major page faults: {{$value}}";
#};
# ext4_errors = {
# condition = "ext4_errors_value > 0";