Repository
Munin (contrib)
Last change
2020-02-01
Graph Categories
Family
contrib
Capabilities
Keywords
Language
Perl

relayd

Name

relayd - Plugin to show statistics about relayd load balancer.

Configuration

The following environment variables are used by this plugin:

  • logfile

    The file where syslog logs relayd’s action (Default: /var/log/relayd.log)

    You need an entry like this in your syslog.conf for this to work:

    !relayd *.* /var/log/relayd.log

    The directive:

    log updates

    is also necessary in relayd.conf.

  • configfile

    The relayd.conf configfile (Default: /usr/local/etc/relayd.conf)

Todo

* determine if the table is completely down (may be *impossible* if a partial
  downtime becomes complete between two runs)

Magic Markers

#%# family=contrib
#%# capabilities=autoconf
#! /usr/bin/perl -w

use strict;
use Munin::Plugin;

=head1 NAME

relayd - Plugin to show statistics about relayd load balancer.

=head1 CONFIGURATION

The following environment variables are used by this plugin:

=over 4

=item logfile

The file where syslog logs relayd's action (Default:
/var/log/relayd.log)

You need an entry like this in your syslog.conf for this to work:

!relayd
*.*                                             /var/log/relayd.log

The directive:

log updates

is also necessary in relayd.conf.

=item configfile

The relayd.conf configfile (Default: /usr/local/etc/relayd.conf)

=back

=head1 TODO

 * determine if the table is completely down (may be *impossible* if a partial
   downtime becomes complete between two runs)

=head1 MAGIC MARKERS

 #%# family=contrib
 #%# capabilities=autoconf

=cut

# wrapper around clean_fieldname() which is too dumb to parse IPs
sub clean_host($) {
	my $host = shift;
        my $clean = clean_fieldname($host);
        $clean  = clean_fieldname('host'.$host) unless ($clean ne '_');
	return $clean;
}

my $logfile = '/var/log/relayd.log';
my $configfile = "/usr/local/etc/relayd.conf";

need_multigraph();

(defined($ENV{'logfile'})) and $logfile = $ENV{'logfile'};
(defined($ENV{'configfile'})) and $configfile = $ENV{'configfile'};

my $cmd = (defined($ARGV[0])) ? $ARGV[0] : '';

my @hosts = ();
open(my $conf, "<", $configfile) or die "can't open $configfile: $!";
my $content = join("", <$conf>);
while ( $content =~ /^\s*table\s*<([^>]+)>\s*{([^}]+)}/mg) {
        my $hosts = $2;
        print "table: $1, " if $Munin::Plugin::DEBUG;
        $hosts =~ s/#.*$//mg; # comments
        $hosts =~ s/^\s+//mg; # trim spaces before lines
        print "hosts: $hosts\n" if $Munin::Plugin::DEBUG;
        push @hosts , split /,?\s+/, $hosts;
}

if ($cmd eq 'config') {
        print("multigraph relayd_avail\n");
        print("graph_title Relayd host availability\n");
        print("graph_args --upper-limit 100\n");
        print("graph_vlabel % availability\n");
        print("graph_category loadbalancer\n");
        print("graph_info Ratio of time when this host was up. This is provided by relayd itself (not averaged by this plugin)\n");
        for my $host (@hosts) {
                my $clean = clean_host($host);
                print("$clean.label $host\n");
        }
        print("multigraph relayd_incidents\n");
        print("graph_title Relayd host incidents\n");
        print("graph_args --lower-limit 0\n");
        print("graph_vlabel down incidents\n");
        print("graph_category loadbalancer\n");
        print("graph_info Number of times this host went down\n");
        for my $host (@hosts) {
                my $clean = clean_host($host);
                print("$clean.type ABSOLUTE\n");
                print("$clean.label $host\n");
        }
        exit(0);
}
elsif ($cmd eq 'autoconf') {
        sub fail($) {
                my $msg=shift;
                print "no ($msg)\n";
                exit(1);
        }
        fail("$logfile unreadable)") unless -r $logfile;
        fail("$configfile unreadable") unless -r $configfile;
        open(my $status, "relayctl show summary|") or fail("cannot open relayctl pipe: $!");
        () = <$status>; # necessary to avoid SIGPIPE to relayctl, which would make it fail
        close($status) or fail("cannot run relayctl: $!");
        print "yes\n";
        exit(0);
}

# sample lines:
# Mar  8 23:05:28 rtr0 relayd[81814]: host 209.44.112.101, check http code (2000ms), state up -> down, availability 97.83%
# Mar  8 23:05:28 rtr0 relayd[81814]: host 209.44.112.96, check http code (2001ms), state up -> down, availability 98.12%
# Mar  8 23:05:31 rtr0 relayd[81813]: table hag: 1 added, 2 deleted, 0 changed, 0 killed
# Mar  8 23:05:31 rtr0 relayd[81814]: host 209.44.112.101, check http code (3ms), state down -> up, availability 97.83%
# Mar  8 23:05:31 rtr0 relayd[81814]: host 209.44.112.96, check http code (3ms), state down -> up, availability 98.12%
# Mar  8 23:05:36 rtr0 relayd[81813]: table hag: 2 added, 1 deleted, 0 changed, 0 killed
# Mar  8 23:21:58 rtr0 relayd[81814]: host 209.44.112.96, check http code (2000ms), state up -> down, availability 98.12%
# Mar  8 23:22:01 rtr0 relayd[81813]: table hag: 0 added, 1 deleted, 0 changed, 0 killed

my (%avail, %down);

my $pos = undef;
($pos) = restore_state();
$pos = 0 unless defined($pos);

my ($log,$reset) = tail_open($logfile,$pos);
#open(my $log, "$logtail -f $logfile -o $offsetfile |") or die("cannot open $logfile: $!");
#open(my $log, "tail -100 $logfile |") or die("cannot open $logfile: $!");
while (<$log>) {
        if (/host ([^,]*), check[^,]*, state [^>]* -> ([^,]*), availability ([0-9]+.[0-9]+)%/) {
                my $host = clean_host($1);

                $down{$host} = 0 unless defined $down{$host};
                $down{$host}++ if $2 eq 'down';
                # yes, we overwrite previous value and take only the recent one. be sad.
                $avail{$host} = $3;
        }
}
$pos = tail_close($log) or warn "failed to close pipe: $!";
save_state($pos);

# get missing availability values from relayctl, if necessary
for my $host (@hosts) {
        my $ran = 0;
        my $clean = clean_host($host);
        if (!defined $avail{$clean} && !$ran) {
                open(my $status, "relayctl show summary|") or die "can't open relayctl: $!";
                while (<$status>) {
                        if (/([\w\.]+)\s+(\d+\.\d+)%/) {
				my $h = clean_host($1);
                                print "found spare value: $2 for $h\n" if $Munin::Plugin::DEBUG;
                                $avail{$h} = $2 unless defined($avail{$h});
                        }
                }
                close $status or die "can't close pipe: $!";
                $ran = 1;
        }
}

print "multigraph relayd_avail\n";
for my $host (@hosts) {
        my $clean = clean_host($host);
        print "$clean.value " . ($avail{$clean} || 'NaN'). "\n";
}

print "multigraph relayd_incidents\n";
for my $host (@hosts) {
        my $clean = clean_host($host);
        print "$clean.value " . ($down{$clean} || 0). "\n";
}