Rules

alert.rules

9.885s ago

31.29ms

Rule State Error Last Evaluation Evaluation Time
alert: InstanceDown expr: up == 0 for: 10m labels: severity: critical annotations: summary: 'Federate : {{ $labels.instance }} est invisible depuis plus de 10 minutes !' ok 9.886s ago 1.194ms
alert: OutOfMemory expr: (node_memory_MemFree_bytes + node_memory_Cached_bytes + node_memory_Buffers_bytes) / node_memory_MemTotal_bytes * 100 < 10 for: 5m labels: severity: warning annotations: summary: 'Federate : Mémoire libre de {{ $labels.instance }} à {{ humanize $value }}%.' ok 9.885s ago 1.366ms
alert: OutOfDiskSpace expr: node_filesystem_free_bytes{fstype="ext4"} / node_filesystem_size_bytes{fstype="ext4"} * 100 < 10 for: 5m labels: severity: warning annotations: summary: Espace libre de {{ $labels.mountpoint }} sur {{ $labels.exported_instance }} à {{ humanize $value }}%. ok 9.884s ago 854.9us
alert: OutOfInodes expr: node_filesystem_files_free{fstype="ext4"} / node_filesystem_files{fstype="ext4"} * 100 < 10 for: 5m labels: severity: warning annotations: summary: 'Federate : Presque plus d'inodes disponibles ({{ $value }}% restant) dans {{ $labels.mountpoint }} sur {{ $labels.instance }}.' ok 9.884s ago 931.2us
alert: CpuUsage expr: (100 - avg by (instance) (irate(node_cpu_seconds_total{mode="idle"}[5m])) * 100) > 75 for: 10m labels: severity: warning annotations: summary: CPU sur {{ $labels.instance }} à {{ humanize $value }}%. ok 9.884s ago 1.26ms
alert: SystemdServiceFailed expr: node_systemd_unit_state{state="failed"} == 1 for: 10m labels: severity: warning annotations: summary: '{{ $labels.name }} a échoué sur {{ $labels.instance }}' ok 9.883s ago 8.274ms
alert: LoadUsage expr: node_load1 > 3 for: 10m labels: severity: warning annotations: summary: La charge de {{ $labels.instance }} est à {{ $value }} ! ok 9.875s ago 1.611ms
alert: AptUpdatePending expr: sum by (instance) (apt_upgrades_pending) > 5 for: 2m labels: severity: info annotations: summary: '{{ $labels.instance }} a {{ $value }} paquets en attente de mise à jour !' ok 9.874s ago 1.89ms
alert: TemperatureWarning expr: node_hwmon_temp_celsius > 45 for: 5m labels: severity: warning annotations: summary: La température de {{ $labels.instance }} est à {{ $value }}°C ( {{ $labels.sensor }} ! ok 9.872s ago 1.317ms
alert: TemperatureCritical expr: node_hwmon_temp_celsius > 55 for: 5m labels: severity: critical annotations: summary: La température de {{ $labels.instance }} est à {{ $value }}°C ( {{ $labels.sensor }} ! ok 9.871s ago 1.004ms
alert: CertificateWarning expr: ((probe_ssl_earliest_cert_expiry - time()) / (24 * 3600) < 10 and (probe_ssl_earliest_cert_expiry - time()) / (24 * 3600) > 2) for: 5m labels: severity: warning annotations: summary: Le certificat SSL de {{ $labels.instance }} arrive à échéance dans {{ humanize $value }} jours ! ok 9.871s ago 2.139ms
alert: CertificateCritical expr: (probe_ssl_earliest_cert_expiry - time()) / (24 * 3600) <= 2 for: 5m labels: severity: critical annotations: summary: Le certificat SSL de {{ $labels.instance }} arrive à échéance dans {{ humanize $value }} jours ! ok 9.869s ago 1.124ms
alert: EndpointDown expr: probe_success == 0 for: 10m labels: severity: critical annotations: summary: Site {{ $labels.instance }} inaccessible ! ok 9.868s ago 914.5us
alert: EndpointError expr: (probe_http_status_code != 0 and probe_http_status_code != 200 and probe_http_status_code != 401) for: 10m labels: severity: critical annotations: summary: Site {{ $labels.instance }} retourne une erreur HTTP {{ $value }} ! ok 9.868s ago 1.517ms
alert: UnhealthyDisk expr: smartmon_device_smart_healthy < 1 for: 10m labels: severity: critical annotations: summary: Disque {{ $labels.disk }} sur {{ $labels.instance }} n'est pas en bonne santé ! ok 9.867s ago 458.6us
alert: EnvironmentalTemperature expr: environmental_temperature > 30 for: 10m labels: severity: warning annotations: summary: Température environmentale sur {{ $labels.instance }} à {{ $value }}° ! ok 9.867s ago 472.6us
alert: EnvironmentalTemperature expr: environmental_temperature < 15 for: 10m labels: severity: warning annotations: summary: Température environmentale sur {{ $labels.instance }} à {{ $value }}° ! ok 9.867s ago 372.8us
alert: EnvironmentalTemperature expr: environmental_temperature > 35 for: 10m labels: severity: critical annotations: summary: Température environmentale sur {{ $labels.instance }} à {{ $value }}° ! ok 9.867s ago 355.4us
alert: EnvironmentalTemperature expr: environmental_temperature < 10 for: 10m labels: severity: critical annotations: summary: Température environmentale sur {{ $labels.instance }} à {{ $value }}° ! ok 9.867s ago 385.4us
alert: SmartDeviceTemperatureWarning expr: smartctl_device_temperature > 60 for: 2m labels: severity: warning annotations: description: |- Device temperature warning (instance {{ $labels.instance }}) VALUE = {{ $value }} LABELS = {{ $labels }} summary: Smart device temperature warning (instance {{ $labels.instance }}) ok 9.867s ago 322us
alert: SmartDeviceTemperatureCritical expr: smartctl_device_temperature > 80 for: 2m labels: severity: critical annotations: description: |- Device temperature critical (instance {{ $labels.instance }}) VALUE = {{ $value }} LABELS = {{ $labels }} summary: Smart device temperature critical (instance {{ $labels.instance }}) ok 9.867s ago 305.3us
alert: SmartCriticalWarning expr: smartctl_device_critical_warning > 0 for: 15m labels: severity: critical annotations: description: |- device has critical warning (instance {{ $labels.instance }}) VALUE = {{ $value }} LABELS = {{ $labels }} summary: Smart critical warning (instance {{ $labels.instance }}) ok 9.867s ago 307.3us
alert: SmartMediaErrors expr: smartctl_device_media_errors > 0 for: 15m labels: severity: critical annotations: description: |- device has media errors (instance {{ $labels.instance }}) VALUE = {{ $value }} LABELS = {{ $labels }} summary: Smart media errors (instance {{ $labels.instance }}) ok 9.867s ago 301.6us
alert: SmartNvmeWearoutIndicator expr: smartctl_device_available_spare{device=~"nvme.*"} < smartctl_device_available_spare_threshold{device=~"nvme.*"} for: 15m labels: severity: critical annotations: description: |- NVMe device is wearing out (instance {{ $labels.instance }}) VALUE = {{ $value }} LABELS = {{ $labels }} summary: Smart NVME Wearout Indicator (instance {{ $labels.instance }}) ok 9.868s ago 735.9us
alert: HostRaidArrayGotInactive expr: (node_md_state{state="inactive"} > 0) * on (instance) group_left (nodename) node_uname_info{nodename=~".+"} labels: severity: critical annotations: description: |- RAID array {{ $labels.device }} is in a degraded state due to one or more disk failures. The number of spare drives is insufficient to fix the issue automatically. VALUE = {{ $value }} LABELS = {{ $labels }} summary: Host RAID array got inactive (instance {{ $labels.instance }}) ok 9.867s ago 756.2us
alert: HostRaidDiskFailure expr: (node_md_disks{state="failed"} > 0) * on (instance) group_left (nodename) node_uname_info{nodename=~".+"} for: 2m labels: severity: warning annotations: description: |- At least one device in RAID array on {{ $labels.instance }} failed. Array {{ $labels.md_device }} needs attention and possibly a disk swap VALUE = {{ $value }} LABELS = {{ $labels }} summary: Host RAID disk failure (instance {{ $labels.instance }}) ok 9.867s ago 716.2us