Repository
Munin (contrib)
Last change
2021-04-06
Graph Categories
Family
auto
Capabilities
Keywords
Language
Python (3.x)
License
GPL-2.0-only
Authors

raid2

Name

raid2 - monitor software and hardware RAID and scrub status

Applicable Systems

Linux systems with mdraid, btrfs, cciss or megasasctl RAID.

Configuration

Following config is needed:

[raid2]
user root

Author

Kim B. Heino b@bbbs.net

License

GPLv2

Magic Markers

#%# family=auto
#%# capabilities=autoconf
#!/usr/bin/env python3

"""Munin plugin to monitor software and hardware RAID and scrub status.

=head1 NAME

raid2 - monitor software and hardware RAID and scrub status

=head1 APPLICABLE SYSTEMS

Linux systems with mdraid, btrfs, cciss or megasasctl RAID.

=head1 CONFIGURATION

Following config is needed:

    [raid2]
    user root

=head1 AUTHOR

Kim B. Heino <b@bbbs.net>

=head1 LICENSE

GPLv2

=head1 MAGIC MARKERS

 #%# family=auto
 #%# capabilities=autoconf

=cut
"""

import glob
import os
import re
import subprocess
import sys


def safename(name):
    """Return safe variable name."""
    if name == '/':
        return 'root'
    return ''.join(char.lower() if char.isalnum() else '_' for char in name)


def run_binary(arg):
    """Run binary and return output."""
    try:
        cmd = subprocess.Popen(
            arg, shell=False, close_fds=True, bufsize=-1,
            stdout=subprocess.PIPE, stderr=subprocess.PIPE)
        outdata, dummy_errdata = cmd.communicate()
    except OSError:
        return None
    return outdata.decode('utf-8', 'ignore').strip()


def find_cciss():
    """Parse /usr/bin/cciss_vol_status."""
    statexe = '/usr/bin/cciss_vol_status'

    # Find device files and binary
    devfiles = sorted(glob.glob('/dev/sg*') + glob.glob('/dev/cciss/c*d0'))
    if not devfiles or not os.path.exists(statexe):
        return []

    # Run binary
    data = run_binary([statexe] + devfiles)
    if not data:
        return []

    # Parse result
    data = data.splitlines()
    devices = []
    for index, device in enumerate(devfiles):
        if index == len(data):
            break
        if ' status: OK' in data[index]:
            status = 1
        elif ' status: ' not in data[index]:
            continue
        else:
            status = 0
        desc = 'Hardware RAID device {}'.format(device)
        devices.append((device, status, desc))
    return devices


def find_megasasctl():
    """Parse /usr/sbin/megasasctl."""
    statexe = '/usr/sbin/megasasctl'

    # Find binary
    if not os.path.exists(statexe):
        return []

    # Run binary
    data = run_binary([statexe, '-HB'])
    if data:
        status = 0
    else:
        status = 1
    return [('lsi', status, 'Hardware RAID device LSI')]


def find_mdstat():
    """Parse /proc/mdstat."""
    # Read file
    try:
        fhn = open('/proc/mdstat')
    except IOError:
        return []
    lines = fhn.readlines()
    fhn.close()

    # Parse it
    devices = []
    device = None
    for line in lines:
        if re.match(r'^md\d+ : ', line):
            device = line.split()[0]
        elif device:
            if '_' in line:
                status = 0
            else:
                status = 1
            desc = 'Software RAID device {}'.format(device)
            devices.append((device, status, desc))
            device = None
    return devices


def find_btrfs():
    """Parse /proc/mounts and btrfs scrub status. Ignore csum errors."""
    # Read file
    try:
        fhn = open('/proc/mounts')
    except IOError:
        return []
    lines = fhn.readlines()
    fhn.close()

    # Parse it
    devmap = {}
    for line in lines:
        line = line.split()
        if len(line) > 2 and line[2] == 'btrfs' and line[0] not in devmap:
            devmap[line[0]] = line[1]

    # Iterate devices
    devices = []
    for mount in devmap.values():
        data = run_binary(['/sbin/btrfs', 'scrub', 'status', '-R', mount])
        if not data or 'data_extents_scrubbed:' not in data:
            continue
        desc = 'BTRFS in {}'.format(mount)
        if (    # pylint: disable=too-many-boolean-expressions
                'read_errors: 0' in data and
                'verify_errors: 0' in data and
                'super_errors: 0' in data and
                'malloc_errors: 0' in data and
                'uncorrectable_errors: 0' in data and
                'unverified_errors: 0' in data
        ):
            devices.append((mount, 1, desc))
        else:
            devices.append((mount, 0, desc))
    return devices


def find_devices():
    """Return list of found device tuples."""
    devices = find_cciss() + find_megasasctl() + find_mdstat() + find_btrfs()
    return devices


def config(devices):
    """Print plugin config."""
    print('graph_title RAID and Scrub Status')
    print('graph_vlabel Status')
    print('graph_category disk')
    print('graph_info Health status: 0 = Error, 1 = OK')
    print('graph_args --base 1000 --lower-limit 0 --upper-limit 1')
    for device in devices:
        print('{}.label {}'.format(safename(device[0]), device[2]))
        print('{}.warning 1:'.format(safename(device[0])))
    if os.environ.get('MUNIN_CAP_DIRTYCONFIG') == '1':
        fetch(devices)


def fetch(devices):
    """Print values."""
    for device in devices:
        print('{}.value {}'.format(safename(device[0]), device[1]))


if __name__ == '__main__':
    if len(sys.argv) > 1 and sys.argv[1] == 'autoconf':
        print('yes' if find_devices() else 'no (no RAID devices found)')
    elif len(sys.argv) > 1 and sys.argv[1] == 'config':
        config(find_devices())
    else:
        fetch(find_devices())