/etc/prometheus/alert_rules.yml > alert.rules
|
Labels |
State |
Active Since |
Value |
alertname="SystemdServiceFailed"
instance="localhost:9100"
job="node"
name="openipmi.service"
severity="warning"
state="failed"
type="forking"
|
firing |
2025-06-23 00:26:53 +0000 UTC |
1 |
Annotations |
- summary
- openipmi.service a échoué sur localhost:9100
|
alertname="SystemdServiceFailed"
instance="localhost:9100"
job="node"
name="nvmf-autoconnect.service"
severity="warning"
state="failed"
type="oneshot"
|
firing |
2025-06-23 00:26:53 +0000 UTC |
1 |
Annotations |
- summary
- nvmf-autoconnect.service a échoué sur localhost:9100
|
|
|
|
|
|
alert: DHT22HighHumidity
expr: dht22_humidity_percent
> 80
for: 5m
labels:
severity: warning
annotations:
description: 'L'humidité mesurée par DHT22 est supérieure à 80% depuis plus
de 5 minutes (valeur actuelle: {{ $value }}%).'
summary: Humidité élevée détectée
|
alert: DHT22LowHumidity
expr: dht22_humidity_percent
< 20
for: 5m
labels:
severity: warning
annotations:
description: 'L'humidité mesurée par DHT22 est inférieure à 20% depuis plus
de 5 minutes (valeur actuelle: {{ $value }}%).'
summary: Humidité basse détectée
|
|
|
|
|
|
|
|
|
alert: InstanceDown
expr: up == 0
for: 10m
labels:
severity: critical
annotations:
summary: 'Federate : {{ $labels.instance }} est invisible depuis plus de 10 minutes
!'
|
alert: LoadUsage
expr: node_load1 > 3
for: 10m
labels:
severity: warning
annotations:
summary: La charge de {{ $labels.instance }} est à {{ $value }} !
|
|
|
|
alert: SmartCriticalWarning
expr: smartctl_device_critical_warning
> 0
for: 15m
labels:
severity: critical
annotations:
description: |-
device has critical warning (instance {{ $labels.instance }})
VALUE = {{ $value }}
LABELS = {{ $labels }}
summary: Smart critical warning (instance {{ $labels.instance }})
|
alert: SmartDeviceTemperatureCritical
expr: smartctl_device_temperature
> 80
for: 2m
labels:
severity: critical
annotations:
description: |-
Device temperature critical (instance {{ $labels.instance }})
VALUE = {{ $value }}
LABELS = {{ $labels }}
summary: Smart device temperature critical (instance {{ $labels.instance }})
|
alert: SmartDeviceTemperatureWarning
expr: smartctl_device_temperature
> 60
for: 2m
labels:
severity: warning
annotations:
description: |-
Device temperature warning (instance {{ $labels.instance }})
VALUE = {{ $value }}
LABELS = {{ $labels }}
summary: Smart device temperature warning (instance {{ $labels.instance }})
|
alert: SmartMediaErrors
expr: smartctl_device_media_errors
> 0
for: 15m
labels:
severity: critical
annotations:
description: |-
device has media errors (instance {{ $labels.instance }})
VALUE = {{ $value }}
LABELS = {{ $labels }}
summary: Smart media errors (instance {{ $labels.instance }})
|
|
|
|
|