3 # DESCRIPTION: Nagios plugin for checking the status of bonded network
4 # interfaces (masters and slaves) on Linux servers.
6 # AUTHOR: Trond H. Amundsen <t.h.amundsen@usit.uio.no>
10 # Copyright (C) 2009 Trond H. Amundsen
12 # This program is free software: you can redistribute it and/or modify
13 # it under the terms of the GNU General Public License as published by
14 # the Free Software Foundation, either version 3 of the License, or
15 # (at your option) any later version.
17 # This program is distributed in the hope that it will be useful, but
18 # WITHOUT ANY WARRANTY; without even the implied warranty of
19 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 # General Public License for more details.
22 # You should have received a copy of the GNU General Public License
23 # along with this program. If not, see <http://www.gnu.org/licenses/>.
29 use Getopt::Long qw(:config no_ignore_case);
32 #---------------------------------------------------------------------
33 # Initialization and global variables
34 #---------------------------------------------------------------------
36 # If we don't have a TTY, the plugin is probably run by Nagios. In
37 # that case, redirect all output to STDERR to STDOUT. Nagios ignores
39 if (! isatty(*STDOUT)) {
40 open STDERR, '>&', 'STDOUT'
41 or do { print "ERROR: Couldn't redirect STDERR to STDOUT\n"; exit 2; }
44 # Version and similar info
45 my $NAME = 'check_linux_bonding';
46 my $VERSION = '1.2.0-beta1';
47 my $AUTHOR = 'Trond H. Amundsen';
48 my $CONTACT = 't.h.amundsen@usit.uio.no';
56 # Nagios error levels reversed
65 # Options with default values
67 = ( 'timeout' => 5, # default timeout is 5 seconds
81 GetOptions('t|timeout=i' => \$opt{timeout},
82 'h|help' => \$opt{help},
84 'V|version' => \$opt{version},
85 'b|blacklist=s' => \@{ $opt{blacklist} },
86 'n|no-bonding=s' => \$opt{no_bonding},
87 's|state' => \$opt{state},
88 'short-state' => \$opt{shortstate},
89 'linebreak=s' => \$opt{linebreak},
90 'v|verbose' => \$opt{verbose},
91 'disable-sysfs' => \$opt{disable_sysfs},
92 ) or pod2usage(-exitstatus => $E_UNKNOWN, -verbose => 0);
94 # If user requested help
96 pod2usage(-exitstatus => $E_OK, -verbose => 1);
99 # If user requested man page
101 pod2usage(-exitstatus => $E_OK, -verbose => 2);
104 # If user requested version info
105 if ($opt{'version'}) {
106 print <<"END_VERSION";
108 Copyright (C) 2009 $AUTHOR
109 License GPLv3+: GNU GPL version 3 or later <http://gnu.org/licenses/gpl.html>
110 This is free software: you are free to change and redistribute it.
111 There is NO WARRANTY, to the extent permitted by law.
113 Written by $AUTHOR <$CONTACT>
118 # Reports (messages) are gathered in this array
124 print "PLUGIN TIMEOUT: $NAME timed out after $opt{timeout} seconds\n";
130 my $linebreak = isatty(*STDOUT) ? "\n" : '<br/>';
132 # Line break from option
133 if (defined $opt{linebreak}) {
134 if ($opt{linebreak} eq 'REG') {
137 elsif ($opt{linebreak} eq 'HTML') {
138 $linebreak = '<br/>';
141 $linebreak = $opt{linebreak};
145 # Blacklisted interfaces
146 my @blacklist = defined $opt{blacklist} ? @{ get_blacklist() } : ();
148 # Translate text exit codes to values
151 'warning' => $E_WARNING,
152 'critical' => $E_CRITICAL,
153 'unknown' => $E_UNKNOWN,
156 # Check syntax of '--no-bonding' option
157 if (!exists $text2exit{$opt{no_bonding}}) {
158 unknown_error("Wrong usage of '--no-bonding' option: '"
160 . "' is not a recognized keyword");
163 #---------------------------------------------------------------------
165 #---------------------------------------------------------------------
168 # Store a message in the message array
171 my ($msg, $exval) = @_;
172 return push @reports, [ $msg, $exval ];
176 # Give an error and exit with unknown state
180 print "ERROR: $msg\n";
185 # Read the blacklist option and return a hash containing the
186 # blacklisted components
192 if (scalar @{ $opt{blacklist} } >= 0) {
193 foreach my $black (@{ $opt{blacklist} }) {
196 open my $BL, '<', $black
197 or do { report('other', "Couldn't open blacklist file $black: $!", $E_UNKNOWN)
210 return [] if $#bl < 0;
212 # Parse blacklist string, put in hash
213 foreach my $black (@bl) {
214 push @blacklist, split m{,}xms, $black;
221 # Find bonding interfaces using sysfs
223 sub find_bonding_sysfs {
224 my $sysdir = '/sys/class/net';
225 my $masters_file = "$sysdir/bonding_masters";
229 if (! -f $masters_file) {
233 # get bonding masters
234 open my $MASTER, '<', $masters_file
235 or unknown_error("Couldn't open $masters_file: $!");
236 @bonds = split m{\s+}xms, <$MASTER>;
239 foreach my $bond (@bonds) {
242 open my $MODE, '<', "$sysdir/$bond/bonding/mode"
243 or unknown_error("ERROR: Couldn't open $sysdir/$bond/bonding/mode: $!");
244 my ($mode, $nr) = split m/\s+/xms, <$MODE>;
246 $bonding{$bond}{mode} = "mode=$nr ($mode)";
250 open my $SLAVES, '<', "$sysdir/$bond/bonding/slaves"
251 or unknown_error("Couldn't open $sysdir/$bond/bonding/slaves: $!");
252 @slaves = split m/\s+/xms, <$SLAVES>;
256 open my $ACTIVE, '<', "$sysdir/$bond/bonding/active_slave"
257 or unknown_error("Couldn't open $sysdir/$bond/bonding/active_slave: $!");
258 $bonding{$bond}{active} = <$ACTIVE>;
260 if (defined $bonding{$bond}{active}) {
261 chop $bonding{$bond}{active};
265 open my $PRIMARY, '<', "$sysdir/$bond/bonding/primary"
266 or unknown_error("Couldn't open $sysdir/$bond/bonding/primary: $!");
267 $bonding{$bond}{primary} = <$PRIMARY>;
269 if (defined $bonding{$bond}{primary}) {
270 chop $bonding{$bond}{primary};
274 foreach my $slave (@slaves) {
275 open my $STATE, '<', "$sysdir/$bond/slave_$slave/operstate"
276 or unknown_error("Couldn't open $sysdir/$bond/slave_$slave/operstate: $!");
277 chop($bonding{$bond}{slave}{$slave} = <$STATE>);
282 open my $BSTATE, '<', "$sysdir/$bond/operstate"
283 or unknown_error("Couldn't open $sysdir/$bond/operstate: $!");
284 chop($bonding{$bond}{status} = <$BSTATE>);
293 # Find bonding interfaces using procfs (fallback, deprecated)
295 sub find_bonding_procfs {
296 my $procdir = '/proc/net/bonding';
300 opendir(my $DIR, $procdir);
301 @bonds = grep { m{\A bond\d+ \z}xms && -f "$procdir/$_" } readdir $DIR;
308 foreach my $b (@bonds) {
310 open my $BOND, '<', "$procdir/$b"
311 or unknown_error("Couldn't open $procdir/$b: $!");
314 if (m{\A Bonding \s Mode: \s (.+) \z}xms) {
315 chop($bonding{$b}{mode} = $1);
318 elsif (m{\A Slave \s Interface: \s (.+) \z}xms) {
321 # get slave and bonding status
322 elsif (m{\A MII \s Status: \s (.+) \z}xms) {
323 if (defined $slave) {
324 chop($bonding{$b}{slave}{$slave} = $1);
327 chop($bonding{$b}{status} = $1);
331 elsif (m{\A Primary \s Slave: \s (.+) \z}xms) {
332 chop($bonding{$b}{primary} = $1);
335 elsif (m{\A Currently \s Active \s Slave: \s (.+) \z}xms) {
336 chop($bonding{$b}{active} = $1);
345 # Find bonding interfaces
350 if ($opt{disable_sysfs}) {
351 $bonding = find_bonding_procfs();
355 $bonding = find_bonding_sysfs();
358 if (scalar keys %{ $bonding } == 0) {
359 $bonding = find_bonding_procfs();
363 # if no bonding interfaces found, exit
364 if (scalar keys %{ $bonding } == 0) {
365 print $reverse_exitcode{$text2exit{$opt{no_bonding}}}
366 . ": No bonding interfaces found\n";
367 exit $text2exit{$opt{no_bonding}};
374 # Returns true if an interface is blacklisted
377 return 0 if !defined $opt{blacklist};
379 foreach $b (@blacklist) {
387 #=====================================================================
389 #=====================================================================
392 my %bonding = %{ find_bonding() };
394 foreach my $b (sort keys %bonding) {
396 # If the master interface is blacklisted
397 if (blacklisted($b)) {
398 my $msg = sprintf 'Bonding interface %s [%s] is %s, but IGNORED',
399 $b, $bonding{$b}{mode}, $bonding{$b}{status};
404 if ($bonding{$b}{status} ne 'up') {
405 my $msg = sprintf 'Bonding interface %s [%s] is %s',
406 $b, $bonding{$b}{mode}, $bonding{$b}{status};
407 report($msg, $E_CRITICAL);
410 my $slaves_are_up = 1; # flag
413 foreach my $i (sort keys %{ $bonding{$b}{slave} }) {
415 # If the slave interface is blacklisted
416 if (blacklisted($i)) {
417 my $msg = sprintf 'Slave interface %s [member of %s] is %s, but IGNORED',
418 $i, $b, $bonding{$b}{slave}{$i};
423 if ($bonding{$b}{slave}{$i} ne 'up') {
424 $slaves_are_up = 0; # not all slaves are up
425 my $msg = sprintf 'Bonding interface %s [%s]: Slave %s is %s',
426 $b, $bonding{$b}{mode}, $i, $bonding{$b}{slave}{$i};
427 report($msg, $E_WARNING);
430 if ($slaves_are_up) {
431 my %slave = map { $_ => q{} } keys %{ $bonding{$b}{slave} };
432 foreach my $s (keys %slave) {
433 if (defined $bonding{$b}{primary} and $bonding{$b}{primary} eq $s) {
436 if (defined $bonding{$b}{active} and $bonding{$b}{active} eq $s) {
440 if (scalar keys %slave == 1) {
441 my @slaves = keys %slave;
442 my $msg = sprintf 'Bonding interface %s [%s] has only one slave (%s)',
443 $b, $bonding{$b}{mode}, $slaves[0];
444 report($msg, $E_WARNING);
446 elsif (scalar keys %slave == 0) { # FIXME: does this ever happen?
447 my $msg = sprintf 'Bonding interface %s [%s] has zero slaves!',
448 $b, $bonding{$b}{mode};
449 report($msg, $E_CRITICAL);
452 my @slaves = map { $_ . $slave{$_} } sort keys %slave;
453 my $msg = sprintf 'Interface %s is %s: %s, %d slaves: %s',
454 $b, $bonding{$b}{status}, $bonding{$b}{mode},
455 scalar @slaves, join q{, }, @slaves;
463 my %nagios_level_count
471 # holds only ok messages
476 foreach (sort {$a->[1] < $b->[1]} @reports) {
477 my ($msg, $level) = @{ $_ };
478 $nagios_level_count{$reverse_exitcode{$level}}++;
480 if ($level == $E_OK && !$opt{verbose}) {
481 push @ok_reports, $msg;
485 # Prefix with nagios level if specified with option '--state'
486 $msg = $reverse_exitcode{$level} . ": $msg" if $opt{state};
488 # Prefix with one-letter nagios level if specified with option '--short-state'
489 $msg = (substr $reverse_exitcode{$level}, 0, 1) . ": $msg" if $opt{shortstate};
491 ($c++ == 0) ? print $msg : print $linebreak, $msg;
494 # Determine our exit code
495 my $exit_code = $E_OK;
496 if ($nagios_level_count{UNKNOWN} > 0) { $exit_code = $E_UNKNOWN; }
497 if ($nagios_level_count{WARNING} > 0) { $exit_code = $E_WARNING; }
498 if ($nagios_level_count{CRITICAL} > 0) { $exit_code = $E_CRITICAL; }
501 if ($exit_code == $E_OK && !$opt{verbose}) {
502 foreach my $msg (@ok_reports) {
503 # Prefix with nagios level if specified with option '--state'
504 $msg = "OK: $msg" if $opt{state};
506 # Prefix with one-letter nagios level if specified with option '--short-state'
507 $msg = "O: $msg" if $opt{shortstate};
509 ($c++ == 0) ? print $msg : print $linebreak, $msg;
515 # Exit with proper exit code
519 # Man page created with:
521 # pod2man -s 3pm -r "`./check_linux_bonding -V | head -n 1`" -c 'Nagios plugin' check_linux_bonding check_linux_bonding.3pm
528 check_linux_bonding - Nagios plugin for checking the status of bonded
529 network interfaces (masters and slaves) on Linux servers.
533 check_linux_bonding [I<OPTION>]...
537 check_linux_bonding is a plugin for the Nagios monitoring software
538 that checks bonding interfaces on Linux. The plugin is fairly simple
539 and will report any interfaces that are down (both masters and
540 slaves). It will also alert you of bonding interfaces with only one
541 slave, since that usually points to a misconfiguration. If no bonding
542 interfaces are detected, the plugin will exit with an OK value
543 (modifiable with the C<--no-bonding> option). It is therefore safe to
544 run this plugin on all your Linux machines:
546 $ ./check_linux_bonding
547 OK: No bonding interfaces found
549 The plugin will first try to use the sysfs (/sys) filesystem to detect
550 bonding interfaces. If that does not work, i.e. the kernel or bonding
551 module is too old for the necessary files to exist, the plugin will
552 use procfs (/proc) as a fallback. The plugin supports an unlimited
553 number of bonding interfaces.
555 In the OK output, the plugin will indicate which of the slaves is
556 active with an exclamation mark C<!>, if applicable. If one of the
557 slaves is configured as primary, this is indicated with an asterisk
560 $ ./check_linux_bonding
561 Interface bond0 is UP: mode=1 (active-backup), 2 slaves: eth0*, eth1!
567 =item -b, --blacklist I<STRING> or I<FILE>
569 Blacklist one or more interfaces. The option can be specified multiple
570 times. If the argument is a file, the file is expected to contain a
571 single line with the same syntax, i.e.:
573 interface1,interface2,...
577 check_linux_bonding -b bond1 -b eth1
578 check_linux_bonding -b bond1,eth1
579 check_linux_bonding -b /etc/check_linux_bonding.black
581 =item -n, --no-bonding I<STRING>
583 This option lets you specify the return value of the plugin if no
584 bonding interfaces are found. The option expects C<ok>, C<warning>,
585 C<critical> or C<unknown> as the argument. Default is C<ok> if the
586 option is not present.
588 =item -t, --timeout I<SECONDS>
590 The number of seconds after which the plugin will abort. Default
591 timeout is 5 seconds if the option is not present.
595 Prefix each alert with its corresponding service state (i.e. warning,
596 critical etc.). This is useful in case of several alerts from the same
601 Same as the B<--state> option above, except that the state is
602 abbreviated to a single letter (W=warning, C=critical etc.).
604 =item --linebreak=I<STRING>
606 check_linux_bonding will sometimes report more than one line, e.g. if
607 there are several alerts. If the script has a TTY, it will use regular
608 linebreaks. If not (which is the case with NRPE) it will use HTML
609 linebreaks. Sometimes it can be useful to control what the plugin uses
610 as a line separator, and this option provides that control.
612 The argument is the exact string to be used as the line
613 separator. There are two exceptions, i.e. two keywords that translates
620 Regular linebreaks, i.e. "\n".
624 HTML linebreaks, i.e. "<br/>".
628 This is a rather special option that is normally not needed. The
629 default behaviour should be sufficient for most users.
633 Verbose output. Will report status on all bonding interfaces,
634 regardless of their alert state.
646 Display version info.
650 The option C<--verbose> (or C<-v>) can be specified to display all
655 This plugin depends on sysfs and fallbacks to procfs. Without these
656 filesystems the plugin will not find any bonding interfaces.
660 If no errors are discovered, a value of 0 (OK) is returned. An exit
661 value of 1 (WARNING) signifies one or more non-critical errors, while
662 2 (CRITICAL) signifies one or more critical errors.
664 The exit value 3 (UNKNOWN) is reserved for errors within the script,
665 or errors getting values sysfs or procfs.
669 Written by Trond H. Amundsen <t.h.amundsen@usit.uio.no>
671 =head1 BUGS AND LIMITATIONS
673 None known at present.
675 =head1 INCOMPATIBILITIES
677 The plugin is only compatible with the Linux operating system.
679 =head1 REPORTING BUGS
681 Report bugs to <t.h.amundsen@usit.uio.no>
683 =head1 LICENSE AND COPYRIGHT
685 This program is free software: you can redistribute it and/or modify
686 it under the terms of the GNU General Public License as published by
687 the Free Software Foundation, either version 3 of the License, or (at
688 your option) any later version.
690 This program is distributed in the hope that it will be useful, but
691 WITHOUT ANY WARRANTY; without even the implied warranty of
692 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
693 General Public License for more details.
695 You should have received a copy of the GNU General Public License
696 along with this program. If not, see L<http://www.gnu.org/licenses/>.
700 L<http://folk.uio.no/trondham/software/check_linux_bonding.html>