Rules

alert.rules

53.537s ago

15.39ms

Rule State Error Last Evaluation Evaluation Time
alert: InstanceDown expr: up == 0 for: 10m labels: severity: critical annotations: summary: 'Federate : {{ $labels.instance }} est invisible depuis plus de 10 minutes !' ok 53.545s ago 714.7us
alert: OutOfMemory expr: (node_memory_MemFree_bytes + node_memory_Cached_bytes + node_memory_Buffers_bytes) / node_memory_MemTotal_bytes * 100 < 10 for: 5m labels: severity: warning annotations: summary: 'Federate : Mémoire libre de {{ $labels.instance }} à {{ humanize $value }}%.' ok 53.545s ago 789.7us
alert: OutOfDiskSpace expr: node_filesystem_free_bytes{fstype="ext4"} / node_filesystem_size_bytes{fstype="ext4"} * 100 < 10 for: 5m labels: severity: warning annotations: summary: Espace libre de {{ $labels.mountpoint }} sur {{ $labels.exported_instance }} à {{ humanize $value }}%. ok 53.544s ago 552.4us
alert: OutOfInodes expr: node_filesystem_files_free{fstype="ext4"} / node_filesystem_files{fstype="ext4"} * 100 < 10 for: 5m labels: severity: warning annotations: summary: 'Federate : Presque plus d'inodes disponibles ({{ $value }}% restant) dans {{ $labels.mountpoint }} sur {{ $labels.instance }}.' ok 53.544s ago 428.5us
alert: CpuUsage expr: (100 - avg by (instance) (irate(node_cpu_seconds_total{mode="idle"}[5m])) * 100) > 75 for: 10m labels: severity: warning annotations: summary: CPU sur {{ $labels.instance }} à {{ humanize $value }}%. ok 53.544s ago 557.2us
alert: SystemdServiceFailed expr: node_systemd_unit_state{state="failed"} == 1 for: 10m labels: severity: warning annotations: summary: '{{ $labels.name }} a échoué sur {{ $labels.instance }}' ok 53.544s ago 3.757ms
alert: LoadUsage expr: node_load1 > 3 for: 10m labels: severity: warning annotations: summary: La charge de {{ $labels.instance }} est à {{ $value }} ! ok 53.54s ago 441us
alert: AptUpdatePending expr: sum by (instance) (apt_upgrades_pending) > 5 for: 2m labels: severity: info annotations: summary: '{{ $labels.instance }} a {{ $value }} paquets en attente de mise à jour !' ok 53.54s ago 332.5us
alert: TemperatureWarning expr: node_hwmon_temp_celsius > 45 for: 5m labels: severity: warning annotations: summary: La température de {{ $labels.instance }} est à {{ $value }}°C ( {{ $labels.sensor }} ! ok 53.54s ago 279.8us
alert: TemperatureCritical expr: node_hwmon_temp_celsius > 55 for: 5m labels: severity: critical annotations: summary: La température de {{ $labels.instance }} est à {{ $value }}°C ( {{ $labels.sensor }} ! ok 53.54s ago 224us
alert: CertificateWarning expr: ((probe_ssl_earliest_cert_expiry - time()) / (24 * 3600) < 10 and (probe_ssl_earliest_cert_expiry - time()) / (24 * 3600) > 2) for: 5m labels: severity: warning annotations: summary: Le certificat SSL de {{ $labels.instance }} arrive à échéance dans {{ humanize $value }} jours ! ok 53.54s ago 953us
alert: CertificateCritical expr: (probe_ssl_earliest_cert_expiry - time()) / (24 * 3600) <= 2 for: 5m labels: severity: critical annotations: summary: Le certificat SSL de {{ $labels.instance }} arrive à échéance dans {{ humanize $value }} jours ! ok 53.539s ago 494.2us
alert: EndpointDown expr: probe_success == 0 for: 10m labels: severity: critical annotations: summary: Site {{ $labels.instance }} inaccessible ! ok 53.539s ago 320.2us
alert: EndpointError expr: (probe_http_status_code != 0 and probe_http_status_code != 200 and probe_http_status_code != 401) for: 10m labels: severity: critical annotations: summary: Site {{ $labels.instance }} retourne une erreur HTTP {{ $value }} ! ok 53.541s ago 705.7us
alert: UnhealthyDisk expr: smartmon_device_smart_healthy < 1 for: 10m labels: severity: critical annotations: summary: Disque {{ $labels.disk }} sur {{ $labels.instance }} n'est pas en bonne santé ! ok 53.541s ago 276.3us
alert: EnvironmentalTemperature expr: dht22_temperature_c >= 25 for: 10m labels: severity: warning annotations: summary: Température environmentale sur {{ $labels.instance }} à {{ $value }}° ! ok 53.541s ago 231.2us
alert: EnvironmentalTemperature expr: dht22_temperature_c <= 15 for: 10m labels: severity: warning annotations: summary: Température environmentale sur {{ $labels.instance }} à {{ $value }}° ! ok 53.541s ago 221.2us
alert: EnvironmentalTemperature expr: dht22_temperature_c >= 35 for: 10m labels: severity: critical annotations: summary: Température environmentale sur {{ $labels.instance }} à {{ $value }}° ! ok 53.541s ago 202.1us
alert: EnvironmentalTemperature expr: dht22_temperature_c <= 10 for: 10m labels: severity: critical annotations: summary: Température environmentale sur {{ $labels.instance }} à {{ $value }}° ! ok 53.541s ago 212.5us
alert: DHT22HighHumidity expr: dht22_humidity_percent > 80 for: 5m labels: severity: warning annotations: description: 'L'humidité mesurée par DHT22 est supérieure à 80% depuis plus de 5 minutes (valeur actuelle: {{ $value }}%).' summary: Humidité élevée détectée ok 53.541s ago 205.9us
alert: DHT22LowHumidity expr: dht22_humidity_percent < 20 for: 5m labels: severity: warning annotations: description: 'L'humidité mesurée par DHT22 est inférieure à 20% depuis plus de 5 minutes (valeur actuelle: {{ $value }}%).' summary: Humidité basse détectée ok 53.541s ago 207.7us
alert: SmartDeviceTemperatureWarning expr: smartctl_device_temperature > 60 for: 2m labels: severity: warning annotations: description: |- Device temperature warning (instance {{ $labels.instance }}) VALUE = {{ $value }} LABELS = {{ $labels }} summary: Smart device temperature warning (instance {{ $labels.instance }}) ok 53.541s ago 182.9us
alert: SmartDeviceTemperatureCritical expr: smartctl_device_temperature > 80 for: 2m labels: severity: critical annotations: description: |- Device temperature critical (instance {{ $labels.instance }}) VALUE = {{ $value }} LABELS = {{ $labels }} summary: Smart device temperature critical (instance {{ $labels.instance }}) ok 53.541s ago 177.8us
alert: SmartCriticalWarning expr: smartctl_device_critical_warning > 0 for: 15m labels: severity: critical annotations: description: |- device has critical warning (instance {{ $labels.instance }}) VALUE = {{ $value }} LABELS = {{ $labels }} summary: Smart critical warning (instance {{ $labels.instance }}) ok 53.541s ago 180.9us
alert: SmartMediaErrors expr: smartctl_device_media_errors > 0 for: 15m labels: severity: critical annotations: description: |- device has media errors (instance {{ $labels.instance }}) VALUE = {{ $value }} LABELS = {{ $labels }} summary: Smart media errors (instance {{ $labels.instance }}) ok 53.541s ago 383.8us
alert: SmartNvmeWearoutIndicator expr: smartctl_device_available_spare{device=~"nvme.*"} < smartctl_device_available_spare_threshold{device=~"nvme.*"} for: 15m labels: severity: critical annotations: description: |- NVMe device is wearing out (instance {{ $labels.instance }}) VALUE = {{ $value }} LABELS = {{ $labels }} summary: Smart NVME Wearout Indicator (instance {{ $labels.instance }}) ok 53.541s ago 792.6us
alert: HostRaidArrayGotInactive expr: (node_md_state{state="inactive"} > 0) * on (instance) group_left (nodename) node_uname_info{nodename=~".+"} labels: severity: critical annotations: description: |- RAID array {{ $labels.device }} is in a degraded state due to one or more disk failures. The number of spare drives is insufficient to fix the issue automatically. VALUE = {{ $value }} LABELS = {{ $labels }} summary: Host RAID array got inactive (instance {{ $labels.instance }}) ok 53.54s ago 708.5us
alert: HostRaidDiskFailure expr: (node_md_disks{state="failed"} > 0) * on (instance) group_left (nodename) node_uname_info{nodename=~".+"} for: 2m labels: severity: warning annotations: description: |- At least one device in RAID array on {{ $labels.instance }} failed. Array {{ $labels.md_device }} needs attention and possibly a disk swap VALUE = {{ $value }} LABELS = {{ $labels }} summary: Host RAID disk failure (instance {{ $labels.instance }}) ok 53.54s ago 609.3us