]> git.uio.no Git - check_linux_bonding.git/blame - check_linux_bonding
* version 1.2.0-beta1
[check_linux_bonding.git] / check_linux_bonding
CommitLineData
01da8424 1#!/usr/bin/perl
2#
3# DESCRIPTION: Nagios plugin for checking the status of bonded network
4# interfaces (masters and slaves) on Linux servers.
5#
6# AUTHOR: Trond H. Amundsen <t.h.amundsen@usit.uio.no>
7#
8# $Id$
9#
10# Copyright (C) 2009 Trond H. Amundsen
11#
12# This program is free software: you can redistribute it and/or modify
13# it under the terms of the GNU General Public License as published by
14# the Free Software Foundation, either version 3 of the License, or
15# (at your option) any later version.
16#
17# This program is distributed in the hope that it will be useful, but
18# WITHOUT ANY WARRANTY; without even the implied warranty of
19# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20# General Public License for more details.
21#
22# You should have received a copy of the GNU General Public License
23# along with this program. If not, see <http://www.gnu.org/licenses/>.
24#
25
26use strict;
27use warnings;
28use POSIX qw(isatty);
29use Getopt::Long qw(:config no_ignore_case);
30use Pod::Usage;
31
32#---------------------------------------------------------------------
33# Initialization and global variables
34#---------------------------------------------------------------------
35
27b1970c 36# If we don't have a TTY, the plugin is probably run by Nagios. In
37# that case, redirect all output to STDERR to STDOUT. Nagios ignores
38# output to STDERR.
39if (! isatty(*STDOUT)) {
40 open STDERR, '>&', 'STDOUT'
41 or do { print "ERROR: Couldn't redirect STDERR to STDOUT\n"; exit 2; }
42}
43
01da8424 44# Version and similar info
45my $NAME = 'check_linux_bonding';
0b38b4f2 46my $VERSION = '1.2.0-beta1';
01da8424 47my $AUTHOR = 'Trond H. Amundsen';
48my $CONTACT = 't.h.amundsen@usit.uio.no';
49
50# Exit codes
51my $E_OK = 0;
52my $E_WARNING = 1;
53my $E_CRITICAL = 2;
54my $E_UNKNOWN = 3;
55
56# Nagios error levels reversed
27b1970c 57my %reverse_exitcode
01da8424 58 = (
59 0 => 'OK',
60 1 => 'WARNING',
61 2 => 'CRITICAL',
62 3 => 'UNKNOWN',
63 );
64
65# Options with default values
66my %opt
0b38b4f2 67 = ( 'timeout' => 5, # default timeout is 5 seconds
68 'help' => 0,
69 'man' => 0,
70 'version' => 0,
71 'blacklist' => [],
72 'no_bonding' => 'ok',
73 'state' => 0,
74 'short-state' => 0,
75 'linebreak' => undef,
76 'verbose' => 0,
77 'disable_sysfs' => 0,
01da8424 78 );
79
80# Get options
27b1970c 81GetOptions('t|timeout=i' => \$opt{timeout},
82 'h|help' => \$opt{help},
83 'man' => \$opt{man},
84 'V|version' => \$opt{version},
85 'b|blacklist=s' => \@{ $opt{blacklist} },
86 'n|no-bonding=s' => \$opt{no_bonding},
87 's|state' => \$opt{state},
88 'short-state' => \$opt{shortstate},
89 'linebreak=s' => \$opt{linebreak},
90 'v|verbose' => \$opt{verbose},
0b38b4f2 91 'disable-sysfs' => \$opt{disable_sysfs},
01da8424 92 ) or pod2usage(-exitstatus => $E_UNKNOWN, -verbose => 0);
93
94# If user requested help
95if ($opt{'help'}) {
96 pod2usage(-exitstatus => $E_OK, -verbose => 1);
97}
98
99# If user requested man page
100if ($opt{'man'}) {
101 pod2usage(-exitstatus => $E_OK, -verbose => 2);
102}
103
104# If user requested version info
105if ($opt{'version'}) {
106 print <<"END_VERSION";
107$NAME $VERSION
108Copyright (C) 2009 $AUTHOR
109License GPLv3+: GNU GPL version 3 or later <http://gnu.org/licenses/gpl.html>
110This is free software: you are free to change and redistribute it.
111There is NO WARRANTY, to the extent permitted by law.
112
113Written by $AUTHOR <$CONTACT>
114END_VERSION
115 exit $E_OK;
116}
117
118# Reports (messages) are gathered in this array
119my @reports = ();
120
121
122# Setting timeout
123$SIG{ALRM} = sub {
124 print "PLUGIN TIMEOUT: $NAME timed out after $opt{timeout} seconds\n";
125 exit $E_UNKNOWN;
126};
127alarm $opt{timeout};
128
129# Default line break
130my $linebreak = isatty(*STDOUT) ? "\n" : '<br/>';
131
132# Line break from option
133if (defined $opt{linebreak}) {
134 if ($opt{linebreak} eq 'REG') {
135 $linebreak = "\n";
136 }
137 elsif ($opt{linebreak} eq 'HTML') {
138 $linebreak = '<br/>';
139 }
140 else {
141 $linebreak = $opt{linebreak};
142 }
143}
144
27b1970c 145# Blacklisted interfaces
146my @blacklist = defined $opt{blacklist} ? @{ get_blacklist() } : ();
147
148# Translate text exit codes to values
149my %text2exit
150 = ( 'ok' => $E_OK,
151 'warning' => $E_WARNING,
152 'critical' => $E_CRITICAL,
153 'unknown' => $E_UNKNOWN,
154 );
155
156# Check syntax of '--no-bonding' option
157if (!exists $text2exit{$opt{no_bonding}}) {
f38c52fb 158 unknown_error("Wrong usage of '--no-bonding' option: '"
159 . $opt{no_bonding}
160 . "' is not a recognized keyword");
27b1970c 161}
162
01da8424 163#---------------------------------------------------------------------
164# Functions
165#---------------------------------------------------------------------
166
167#
168# Store a message in the message array
169#
170sub report {
171 my ($msg, $exval) = @_;
172 return push @reports, [ $msg, $exval ];
173}
174
175#
176# Give an error and exit with unknown state
177#
178sub unknown_error {
179 my $msg = shift;
180 print "ERROR: $msg\n";
181 exit $E_UNKNOWN;
182}
183
27b1970c 184#
185# Read the blacklist option and return a hash containing the
186# blacklisted components
187#
188sub get_blacklist {
189 my @bl = ();
190 my @blacklist = ();
191
192 if (scalar @{ $opt{blacklist} } >= 0) {
193 foreach my $black (@{ $opt{blacklist} }) {
194 my $tmp = q{};
195 if (-f $black) {
196 open my $BL, '<', $black
197 or do { report('other', "Couldn't open blacklist file $black: $!", $E_UNKNOWN)
198 and return {} };
199 $tmp = <$BL>;
200 close $BL;
201 chomp $tmp;
202 }
203 else {
204 $tmp = $black;
205 }
206 push @bl, $tmp;
207 }
208 }
209
210 return [] if $#bl < 0;
211
212 # Parse blacklist string, put in hash
213 foreach my $black (@bl) {
214 push @blacklist, split m{,}xms, $black;
215 }
216
217 return \@blacklist;
218}
219
01da8424 220#
221# Find bonding interfaces using sysfs
222#
223sub find_bonding_sysfs {
224 my $sysdir = '/sys/class/net';
225 my $masters_file = "$sysdir/bonding_masters";
226 my @bonds = ();
227 my %bonding = ();
228
229 if (! -f $masters_file) {
230 return {};
231 }
232
233 # get bonding masters
234 open my $MASTER, '<', $masters_file
235 or unknown_error("Couldn't open $masters_file: $!");
236 @bonds = split m{\s+}xms, <$MASTER>;
237 close $MASTER;
238
239 foreach my $bond (@bonds) {
240
241 # get bonding mode
242 open my $MODE, '<', "$sysdir/$bond/bonding/mode"
243 or unknown_error("ERROR: Couldn't open $sysdir/$bond/bonding/mode: $!");
244 my ($mode, $nr) = split m/\s+/xms, <$MODE>;
245 close $MODE;
246 $bonding{$bond}{mode} = "mode=$nr ($mode)";
247
248 # get slaves
249 my @slaves = ();
250 open my $SLAVES, '<', "$sysdir/$bond/bonding/slaves"
251 or unknown_error("Couldn't open $sysdir/$bond/bonding/slaves: $!");
252 @slaves = split m/\s+/xms, <$SLAVES>;
253 close $SLAVES;
254
255 # get active slave
256 open my $ACTIVE, '<', "$sysdir/$bond/bonding/active_slave"
257 or unknown_error("Couldn't open $sysdir/$bond/bonding/active_slave: $!");
258 $bonding{$bond}{active} = <$ACTIVE>;
259 close $ACTIVE;
260 if (defined $bonding{$bond}{active}) {
261 chop $bonding{$bond}{active};
262 }
263
264 # get primary slave
265 open my $PRIMARY, '<', "$sysdir/$bond/bonding/primary"
266 or unknown_error("Couldn't open $sysdir/$bond/bonding/primary: $!");
267 $bonding{$bond}{primary} = <$PRIMARY>;
268 close $PRIMARY;
269 if (defined $bonding{$bond}{primary}) {
270 chop $bonding{$bond}{primary};
271 }
272
273 # get slave status
274 foreach my $slave (@slaves) {
275 open my $STATE, '<', "$sysdir/$bond/slave_$slave/operstate"
276 or unknown_error("Couldn't open $sysdir/$bond/slave_$slave/operstate: $!");
277 chop($bonding{$bond}{slave}{$slave} = <$STATE>);
278 close $STATE;
279 }
280
281 # get bond state
282 open my $BSTATE, '<', "$sysdir/$bond/operstate"
283 or unknown_error("Couldn't open $sysdir/$bond/operstate: $!");
284 chop($bonding{$bond}{status} = <$BSTATE>);
285 close $BSTATE;
286 }
287
288 return \%bonding;
289}
290
291
292#
293# Find bonding interfaces using procfs (fallback, deprecated)
294#
295sub find_bonding_procfs {
296 my $procdir = '/proc/net/bonding';
297 my @bonds = ();
298 my %bonding = ();
299
300 opendir(my $DIR, $procdir);
301 @bonds = grep { m{\A bond\d+ \z}xms && -f "$procdir/$_" } readdir $DIR;
302 closedir $DIR;
303
304 if ($#bonds == -1) {
305 return {};
306 }
307
308 foreach my $b (@bonds) {
309 my $slave = undef;
310 open my $BOND, '<', "$procdir/$b"
311 or unknown_error("Couldn't open $procdir/$b: $!");
312 while (<$BOND>) {
313 # get bonding mode
314 if (m{\A Bonding \s Mode: \s (.+) \z}xms) {
315 chop($bonding{$b}{mode} = $1);
316 }
317 # get slave
318 elsif (m{\A Slave \s Interface: \s (.+) \z}xms) {
319 chop($slave = $1);
320 }
321 # get slave and bonding status
322 elsif (m{\A MII \s Status: \s (.+) \z}xms) {
323 if (defined $slave) {
324 chop($bonding{$b}{slave}{$slave} = $1);
325 }
326 else {
327 chop($bonding{$b}{status} = $1);
328 }
329 }
330 # get primary slave
331 elsif (m{\A Primary \s Slave: \s (.+) \z}xms) {
332 chop($bonding{$b}{primary} = $1);
333 }
334 # get active slave
335 elsif (m{\A Currently \s Active \s Slave: \s (.+) \z}xms) {
336 chop($bonding{$b}{active} = $1);
337 }
338 }
339 }
340
341 return \%bonding;
342}
343
344#
345# Find bonding interfaces
346#
347sub find_bonding {
348 my $bonding = undef;
349
0b38b4f2 350 if ($opt{disable_sysfs}) {
351 $bonding = find_bonding_procfs();
352 }
353 else {
354 # first try sysfs
355 $bonding = find_bonding_sysfs();
01da8424 356
0b38b4f2 357 # second try procfs
358 if (scalar keys %{ $bonding } == 0) {
359 $bonding = find_bonding_procfs();
360 }
01da8424 361 }
362
363 # if no bonding interfaces found, exit
364 if (scalar keys %{ $bonding } == 0) {
27b1970c 365 print $reverse_exitcode{$text2exit{$opt{no_bonding}}}
366 . ": No bonding interfaces found\n";
367 exit $text2exit{$opt{no_bonding}};
01da8424 368 }
369
370 return $bonding;
371}
372
27b1970c 373#
374# Returns true if an interface is blacklisted
375#
376sub blacklisted {
377 return 0 if !defined $opt{blacklist};
378 my $if = shift;
379 foreach $b (@blacklist) {
380 if ($if eq $b) {
381 return 1;
382 }
383 }
384 return 0;
385}
01da8424 386
387#=====================================================================
388# Main program
389#=====================================================================
390
391
392my %bonding = %{ find_bonding() };
27b1970c 393MASTER:
01da8424 394foreach my $b (sort keys %bonding) {
395
27b1970c 396 # If the master interface is blacklisted
397 if (blacklisted($b)) {
398 my $msg = sprintf 'Bonding interface %s [%s] is %s, but IGNORED',
399 $b, $bonding{$b}{mode}, $bonding{$b}{status};
400 report($msg, $E_OK);
401 next MASTER;
402 }
403
01da8424 404 if ($bonding{$b}{status} ne 'up') {
27b1970c 405 my $msg = sprintf 'Bonding interface %s [%s] is %s',
406 $b, $bonding{$b}{mode}, $bonding{$b}{status};
407 report($msg, $E_CRITICAL);
01da8424 408 }
409 else {
410 my $slaves_are_up = 1; # flag
411
27b1970c 412 SLAVE:
01da8424 413 foreach my $i (sort keys %{ $bonding{$b}{slave} }) {
27b1970c 414
415 # If the slave interface is blacklisted
416 if (blacklisted($i)) {
417 my $msg = sprintf 'Slave interface %s [member of %s] is %s, but IGNORED',
418 $i, $b, $bonding{$b}{slave}{$i};
419 report($msg, $E_OK);
420 next SLAVE;
421 }
422
01da8424 423 if ($bonding{$b}{slave}{$i} ne 'up') {
424 $slaves_are_up = 0; # not all slaves are up
27b1970c 425 my $msg = sprintf 'Bonding interface %s [%s]: Slave %s is %s',
426 $b, $bonding{$b}{mode}, $i, $bonding{$b}{slave}{$i};
427 report($msg, $E_WARNING);
01da8424 428 }
429 }
430 if ($slaves_are_up) {
431 my %slave = map { $_ => q{} } keys %{ $bonding{$b}{slave} };
432 foreach my $s (keys %slave) {
433 if (defined $bonding{$b}{primary} and $bonding{$b}{primary} eq $s) {
434 $slave{$s} .= '*';
435 }
436 if (defined $bonding{$b}{active} and $bonding{$b}{active} eq $s) {
437 $slave{$s} .= '!';
438 }
439 }
440 if (scalar keys %slave == 1) {
441 my @slaves = keys %slave;
27b1970c 442 my $msg = sprintf 'Bonding interface %s [%s] has only one slave (%s)',
443 $b, $bonding{$b}{mode}, $slaves[0];
444 report($msg, $E_WARNING);
01da8424 445 }
446 elsif (scalar keys %slave == 0) { # FIXME: does this ever happen?
27b1970c 447 my $msg = sprintf 'Bonding interface %s [%s] has zero slaves!',
448 $b, $bonding{$b}{mode};
449 report($msg, $E_CRITICAL);
01da8424 450 }
451 else {
452 my @slaves = map { $_ . $slave{$_} } sort keys %slave;
27b1970c 453 my $msg = sprintf 'Interface %s is %s: %s, %d slaves: %s',
454 $b, $bonding{$b}{status}, $bonding{$b}{mode},
455 scalar @slaves, join q{, }, @slaves;
456 report($msg, $E_OK);
01da8424 457 }
458 }
459 }
460}
461
462# Counter variable
463my %nagios_level_count
464 = (
465 'OK' => 0,
466 'WARNING' => 0,
467 'CRITICAL' => 0,
468 'UNKNOWN' => 0,
469 );
470
471# holds only ok messages
472my @ok_reports = ();
473
474my $c = 0;
475ALERT:
476foreach (sort {$a->[1] < $b->[1]} @reports) {
477 my ($msg, $level) = @{ $_ };
27b1970c 478 $nagios_level_count{$reverse_exitcode{$level}}++;
01da8424 479
480 if ($level == $E_OK && !$opt{verbose}) {
481 push @ok_reports, $msg;
482 next ALERT;
483 }
484
485 # Prefix with nagios level if specified with option '--state'
27b1970c 486 $msg = $reverse_exitcode{$level} . ": $msg" if $opt{state};
01da8424 487
488 # Prefix with one-letter nagios level if specified with option '--short-state'
27b1970c 489 $msg = (substr $reverse_exitcode{$level}, 0, 1) . ": $msg" if $opt{shortstate};
01da8424 490
491 ($c++ == 0) ? print $msg : print $linebreak, $msg;
492}
493
494# Determine our exit code
495my $exit_code = $E_OK;
496if ($nagios_level_count{UNKNOWN} > 0) { $exit_code = $E_UNKNOWN; }
497if ($nagios_level_count{WARNING} > 0) { $exit_code = $E_WARNING; }
498if ($nagios_level_count{CRITICAL} > 0) { $exit_code = $E_CRITICAL; }
499
500# Print OK messages
501if ($exit_code == $E_OK && !$opt{verbose}) {
502 foreach my $msg (@ok_reports) {
503 # Prefix with nagios level if specified with option '--state'
504 $msg = "OK: $msg" if $opt{state};
505
506 # Prefix with one-letter nagios level if specified with option '--short-state'
507 $msg = "O: $msg" if $opt{shortstate};
508
509 ($c++ == 0) ? print $msg : print $linebreak, $msg;
510 }
511}
512
513print "\n";
514
515# Exit with proper exit code
516exit $exit_code;
517
518
519# Man page created with:
520#
521# pod2man -s 3pm -r "`./check_linux_bonding -V | head -n 1`" -c 'Nagios plugin' check_linux_bonding check_linux_bonding.3pm
522#
523
524__END__
525
526=head1 NAME
527
528check_linux_bonding - Nagios plugin for checking the status of bonded
529network interfaces (masters and slaves) on Linux servers.
530
531=head1 SYNOPSIS
532
533check_linux_bonding [I<OPTION>]...
534
535=head1 DESCRIPTION
536
537check_linux_bonding is a plugin for the Nagios monitoring software
538that checks bonding interfaces on Linux. The plugin is fairly simple
539and will report any interfaces that are down (both masters and
540slaves). It will also alert you of bonding interfaces with only one
541slave, since that usually points to a misconfiguration. If no bonding
a07d1559 542interfaces are detected, the plugin will exit with an OK value
543(modifiable with the C<--no-bonding> option). It is therefore safe to
544run this plugin on all your Linux machines:
01da8424 545
546 $ ./check_linux_bonding
a07d1559 547 OK: No bonding interfaces found
01da8424 548
549The plugin will first try to use the sysfs (/sys) filesystem to detect
550bonding interfaces. If that does not work, i.e. the kernel or bonding
551module is too old for the necessary files to exist, the plugin will
552use procfs (/proc) as a fallback. The plugin supports an unlimited
553number of bonding interfaces.
554
555In the OK output, the plugin will indicate which of the slaves is
556active with an exclamation mark C<!>, if applicable. If one of the
557slaves is configured as primary, this is indicated with an asterisk
558C<*>:
559
560 $ ./check_linux_bonding
561 Interface bond0 is UP: mode=1 (active-backup), 2 slaves: eth0*, eth1!
562
563=head1 OPTIONS
564
565=over 4
566
a07d1559 567=item -b, --blacklist I<STRING> or I<FILE>
568
569Blacklist one or more interfaces. The option can be specified multiple
570times. If the argument is a file, the file is expected to contain a
571single line with the same syntax, i.e.:
572
573 interface1,interface2,...
574
575Examples:
576
577 check_linux_bonding -b bond1 -b eth1
578 check_linux_bonding -b bond1,eth1
579 check_linux_bonding -b /etc/check_linux_bonding.black
580
581=item -n, --no-bonding I<STRING>
582
583This option lets you specify the return value of the plugin if no
584bonding interfaces are found. The option expects C<ok>, C<warning>,
585C<critical> or C<unknown> as the argument. Default is C<ok> if the
586option is not present.
587
01da8424 588=item -t, --timeout I<SECONDS>
589
590The number of seconds after which the plugin will abort. Default
591timeout is 5 seconds if the option is not present.
592
a07d1559 593=item -s, --state
01da8424 594
595Prefix each alert with its corresponding service state (i.e. warning,
596critical etc.). This is useful in case of several alerts from the same
597monitored system.
598
599=item --short-state
600
601Same as the B<--state> option above, except that the state is
602abbreviated to a single letter (W=warning, C=critical etc.).
603
604=item --linebreak=I<STRING>
605
606check_linux_bonding will sometimes report more than one line, e.g. if
607there are several alerts. If the script has a TTY, it will use regular
608linebreaks. If not (which is the case with NRPE) it will use HTML
609linebreaks. Sometimes it can be useful to control what the plugin uses
610as a line separator, and this option provides that control.
611
612The argument is the exact string to be used as the line
613separator. There are two exceptions, i.e. two keywords that translates
614to the following:
615
616=over 4
617
618=item B<REG>
619
620Regular linebreaks, i.e. "\n".
621
622=item B<HTML>
623
624HTML linebreaks, i.e. "<br/>".
625
626=back
627
628This is a rather special option that is normally not needed. The
629default behaviour should be sufficient for most users.
630
631=item -v, --verbose
632
633Verbose output. Will report status on all bonding interfaces,
634regardless of their alert state.
635
636=item -h, --help
637
638Display help text.
639
640=item -m, --man
641
642Display man page.
643
644=item -V, --version
645
646Display version info.
647
648=head1 DIAGNOSTICS
649
650The option C<--verbose> (or C<-v>) can be specified to display all
651bonding interfaces.
652
653=head1 DEPENDENCIES
654
655This plugin depends on sysfs and fallbacks to procfs. Without these
656filesystems the plugin will not find any bonding interfaces.
657
658=head1 EXIT STATUS
659
660If no errors are discovered, a value of 0 (OK) is returned. An exit
661value of 1 (WARNING) signifies one or more non-critical errors, while
6622 (CRITICAL) signifies one or more critical errors.
663
664The exit value 3 (UNKNOWN) is reserved for errors within the script,
665or errors getting values sysfs or procfs.
666
667=head1 AUTHOR
668
669Written by Trond H. Amundsen <t.h.amundsen@usit.uio.no>
670
671=head1 BUGS AND LIMITATIONS
672
673None known at present.
674
675=head1 INCOMPATIBILITIES
676
677The plugin is only compatible with the Linux operating system.
678
679=head1 REPORTING BUGS
680
681Report bugs to <t.h.amundsen@usit.uio.no>
682
683=head1 LICENSE AND COPYRIGHT
684
685This program is free software: you can redistribute it and/or modify
686it under the terms of the GNU General Public License as published by
687the Free Software Foundation, either version 3 of the License, or (at
688your option) any later version.
689
690This program is distributed in the hope that it will be useful, but
691WITHOUT ANY WARRANTY; without even the implied warranty of
692MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
693General Public License for more details.
694
695You should have received a copy of the GNU General Public License
696along with this program. If not, see L<http://www.gnu.org/licenses/>.
697
698=head1 SEE ALSO
699
700L<http://folk.uio.no/trondham/software/check_linux_bonding.html>
701
702=cut