| Rule |
State |
Error |
Last Evaluation |
Evaluation Time |
| alert: InstanceDown
expr: up == 0
for: 10m
labels:
severity: critical
annotations:
summary: 'Federate : {{ $labels.instance }} est invisible depuis plus de 10 minutes
!'
|
ok
|
|
40.885s ago
|
1.374ms |
| alert: OutOfMemory
expr: (node_memory_MemFree_bytes
+ node_memory_Cached_bytes + node_memory_Buffers_bytes) / node_memory_MemTotal_bytes
* 100 < 10
for: 5m
labels:
severity: warning
annotations:
summary: 'Federate : Mémoire libre de {{ $labels.instance }} à {{ humanize $value
}}%.'
|
ok
|
|
40.884s ago
|
1.814ms |
| alert: OutOfDiskSpace
expr: node_filesystem_free_bytes{fstype="ext4"}
/ node_filesystem_size_bytes{fstype="ext4"} * 100 < 10
for: 5m
labels:
severity: warning
annotations:
summary: Espace libre de {{ $labels.mountpoint }} sur {{ $labels.exported_instance
}} à {{ humanize $value }}%.
|
ok
|
|
40.883s ago
|
1.167ms |
| alert: OutOfInodes
expr: node_filesystem_files_free{fstype="ext4"}
/ node_filesystem_files{fstype="ext4"} * 100 < 10
for: 5m
labels:
severity: warning
annotations:
summary: 'Federate : Presque plus d'inodes disponibles ({{ $value }}% restant)
dans {{ $labels.mountpoint }} sur {{ $labels.instance }}.'
|
ok
|
|
40.882s ago
|
1.277ms |
| alert: CpuUsage
expr: (100
- avg by (instance) (irate(node_cpu_seconds_total{mode="idle"}[5m])) * 100)
> 75
for: 10m
labels:
severity: warning
annotations:
summary: CPU sur {{ $labels.instance }} à {{ humanize $value }}%.
|
ok
|
|
40.881s ago
|
1.325ms |
| alert: SystemdServiceFailed
expr: node_systemd_unit_state{state="failed"}
== 1
for: 10m
labels:
severity: warning
annotations:
summary: '{{ $labels.name }} a échoué sur {{ $labels.instance }}'
|
ok
|
|
40.88s ago
|
8.854ms |
| alert: LoadUsage
expr: node_load1 > 3
for: 10m
labels:
severity: warning
annotations:
summary: La charge de {{ $labels.instance }} est à {{ $value }} !
|
ok
|
|
40.872s ago
|
946.4us |
| alert: AptUpdatePending
expr: sum
by (instance) (apt_upgrades_pending) > 5
for: 2m
labels:
severity: info
annotations:
summary: '{{ $labels.instance }} a {{ $value }} paquets en attente de mise à jour
!'
|
ok
|
|
40.871s ago
|
1.65ms |
| alert: TemperatureWarning
expr: node_hwmon_temp_celsius
> 45
for: 5m
labels:
severity: warning
annotations:
summary: La température de {{ $labels.instance }} est à {{ $value }}°C ( {{ $labels.sensor
}} !
|
ok
|
|
40.87s ago
|
874.4us |
| alert: TemperatureCritical
expr: node_hwmon_temp_celsius
> 55
for: 5m
labels:
severity: critical
annotations:
summary: La température de {{ $labels.instance }} est à {{ $value }}°C ( {{ $labels.sensor
}} !
|
ok
|
|
40.87s ago
|
983.8us |
| alert: CertificateWarning
expr: ((probe_ssl_earliest_cert_expiry
- time()) / (24 * 3600) < 10 and (probe_ssl_earliest_cert_expiry - time()) /
(24 * 3600) > 2)
for: 5m
labels:
severity: warning
annotations:
summary: Le certificat SSL de {{ $labels.instance }} arrive à échéance dans {{ humanize
$value }} jours !
|
ok
|
|
40.869s ago
|
2.349ms |
| alert: CertificateCritical
expr: (probe_ssl_earliest_cert_expiry
- time()) / (24 * 3600) <= 2
for: 5m
labels:
severity: critical
annotations:
summary: Le certificat SSL de {{ $labels.instance }} arrive à échéance dans {{ humanize
$value }} jours !
|
ok
|
|
40.867s ago
|
1.301ms |
| alert: EndpointDown
expr: probe_success
== 0
for: 10m
labels:
severity: critical
annotations:
summary: Site {{ $labels.instance }} inaccessible !
|
ok
|
|
40.866s ago
|
772.4us |
| alert: EndpointError
expr: (probe_http_status_code
!= 0 and probe_http_status_code != 200 and probe_http_status_code != 401)
for: 10m
labels:
severity: critical
annotations:
summary: Site {{ $labels.instance }} retourne une erreur HTTP {{ $value }} !
|
ok
|
|
40.866s ago
|
2.163ms |
| alert: UnhealthyDisk
expr: smartmon_device_smart_healthy
< 1
for: 10m
labels:
severity: critical
annotations:
summary: Disque {{ $labels.disk }} sur {{ $labels.instance }} n'est pas en bonne
santé !
|
ok
|
|
40.864s ago
|
597.7us |
| alert: EnvironmentalTemperature
expr: dht22_temperature_c
>= 25
for: 10m
labels:
severity: warning
annotations:
summary: Température environmentale sur {{ $labels.instance }} à {{ $value }}° !
|
ok
|
|
40.864s ago
|
700.6us |
| alert: EnvironmentalTemperature
expr: dht22_temperature_c
<= 15
for: 10m
labels:
severity: warning
annotations:
summary: Température environmentale sur {{ $labels.instance }} à {{ $value }}° !
|
ok
|
|
40.864s ago
|
587us |
| alert: EnvironmentalTemperature
expr: dht22_temperature_c
>= 35
for: 10m
labels:
severity: critical
annotations:
summary: Température environmentale sur {{ $labels.instance }} à {{ $value }}° !
|
ok
|
|
40.864s ago
|
749.7us |
| alert: EnvironmentalTemperature
expr: dht22_temperature_c
<= 10
for: 10m
labels:
severity: critical
annotations:
summary: Température environmentale sur {{ $labels.instance }} à {{ $value }}° !
|
ok
|
|
40.863s ago
|
561.5us |
| alert: DHT22HighHumidity
expr: dht22_humidity_percent
> 80
for: 5m
labels:
severity: warning
annotations:
description: 'L'humidité mesurée par DHT22 est supérieure à 80% depuis plus
de 5 minutes (valeur actuelle: {{ $value }}%).'
summary: Humidité élevée détectée
|
ok
|
|
40.863s ago
|
548.7us |
| alert: DHT22LowHumidity
expr: dht22_humidity_percent
< 20
for: 5m
labels:
severity: warning
annotations:
description: 'L'humidité mesurée par DHT22 est inférieure à 20% depuis plus
de 5 minutes (valeur actuelle: {{ $value }}%).'
summary: Humidité basse détectée
|
ok
|
|
40.863s ago
|
541.9us |
| alert: SmartDeviceTemperatureWarning
expr: smartctl_device_temperature
> 60
for: 2m
labels:
severity: warning
annotations:
description: |-
Device temperature warning (instance {{ $labels.instance }})
VALUE = {{ $value }}
LABELS = {{ $labels }}
summary: Smart device temperature warning (instance {{ $labels.instance }})
|
ok
|
|
40.863s ago
|
519.5us |
| alert: SmartDeviceTemperatureCritical
expr: smartctl_device_temperature
> 80
for: 2m
labels:
severity: critical
annotations:
description: |-
Device temperature critical (instance {{ $labels.instance }})
VALUE = {{ $value }}
LABELS = {{ $labels }}
summary: Smart device temperature critical (instance {{ $labels.instance }})
|
ok
|
|
40.863s ago
|
498.9us |
| alert: SmartCriticalWarning
expr: smartctl_device_critical_warning
> 0
for: 15m
labels:
severity: critical
annotations:
description: |-
device has critical warning (instance {{ $labels.instance }})
VALUE = {{ $value }}
LABELS = {{ $labels }}
summary: Smart critical warning (instance {{ $labels.instance }})
|
ok
|
|
40.863s ago
|
574.2us |
| alert: SmartMediaErrors
expr: smartctl_device_media_errors
> 0
for: 15m
labels:
severity: critical
annotations:
description: |-
device has media errors (instance {{ $labels.instance }})
VALUE = {{ $value }}
LABELS = {{ $labels }}
summary: Smart media errors (instance {{ $labels.instance }})
|
ok
|
|
40.863s ago
|
538.9us |
| alert: SmartNvmeWearoutIndicator
expr: smartctl_device_available_spare{device=~"nvme.*"}
< smartctl_device_available_spare_threshold{device=~"nvme.*"}
for: 15m
labels:
severity: critical
annotations:
description: |-
NVMe device is wearing out (instance {{ $labels.instance }})
VALUE = {{ $value }}
LABELS = {{ $labels }}
summary: Smart NVME Wearout Indicator (instance {{ $labels.instance }})
|
ok
|
|
40.863s ago
|
896.4us |
| alert: HostRaidArrayGotInactive
expr: (node_md_state{state="inactive"}
> 0) * on (instance) group_left (nodename) node_uname_info{nodename=~".+"}
labels:
severity: critical
annotations:
description: |-
RAID array {{ $labels.device }} is in a degraded state due to one or more disk failures. The number of spare drives is insufficient to fix the issue automatically.
VALUE = {{ $value }}
LABELS = {{ $labels }}
summary: Host RAID array got inactive (instance {{ $labels.instance }})
|
ok
|
|
40.863s ago
|
1.136ms |
| alert: HostRaidDiskFailure
expr: (node_md_disks{state="failed"}
> 0) * on (instance) group_left (nodename) node_uname_info{nodename=~".+"}
for: 2m
labels:
severity: warning
annotations:
description: |-
At least one device in RAID array on {{ $labels.instance }} failed. Array {{ $labels.md_device }} needs attention and possibly a disk swap
VALUE = {{ $value }}
LABELS = {{ $labels }}
summary: Host RAID disk failure (instance {{ $labels.instance }})
|
ok
|
|
40.862s ago
|
1.193ms |