]> git.uio.no Git - check_openmanage.git/blame - check_openmanage
finpussing
[check_openmanage.git] / check_openmanage
CommitLineData
669797e1 1#!/usr/bin/perl
2#
3# Nagios plugin
4#
5# Monitor Dell server hardware status using Dell OpenManage Server
6# Administrator, either locally via NRPE, or remotely via SNMP.
7#
8# $Id$
9#
10# Copyright (C) 2009 Trond H. Amundsen
11#
12# This program is free software: you can redistribute it and/or modify
13# it under the terms of the GNU General Public License as published by
14# the Free Software Foundation, either version 3 of the License, or
15# (at your option) any later version.
16#
17# This program is distributed in the hope that it will be useful, but
18# WITHOUT ANY WARRANTY; without even the implied warranty of
19# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20# General Public License for more details.
21#
22# You should have received a copy of the GNU General Public License
23# along with this program. If not, see <http://www.gnu.org/licenses/>.
24#
25
26require 5.006; # Perl v5.6.0 or newer is required
27use strict;
28use warnings;
29use POSIX qw(isatty ceil);
30use Getopt::Long qw(:config no_ignore_case);
31
32# Global (package) variables used throughout the code
33use vars qw( $NAME $VERSION $AUTHOR $CONTACT $E_OK $E_WARNING $E_CRITICAL
34 $E_UNKNOWN $FW_LOCK $USAGE $HELP $LICENSE
35 $snmp_session $snmp_error $omreport $globalstatus $global
36 $linebreak $omopt_chassis $omopt_system $blade
37 $exit_code $snmp
38 %check %opt %perfdata %reverse_exitcode %status2nagios
39 %snmp_status %snmp_probestatus %probestatus2nagios %sysinfo
40 %blacklist %nagios_alert_count %count
41 @controllers @enclosures
42 @report_storage @report_chassis @report_other
43 );
44
45#---------------------------------------------------------------------
46# Initialization and global variables
47#---------------------------------------------------------------------
48
49# If we don't have a TTY, the plugin is probably run by Nagios. In
50# that case, redirect all output to STDERR to STDOUT. Nagios ignores
51# output to STDERR.
52if (! isatty *STDOUT) {
d866dafd 53 open STDERR, '>&', 'STDOUT'
54 or do { print "ERROR: Couldn't redirect STDERR to STDOUT\n"; exit 2; }
669797e1 55}
56
57# Version and similar info
58$NAME = 'check_openmanage';
98b224a3 59$VERSION = '3.5.0-beta13';
669797e1 60$AUTHOR = 'Trond H. Amundsen';
61$CONTACT = 't.h.amundsen@usit.uio.no';
62
63# Exit codes
64$E_OK = 0;
65$E_WARNING = 1;
66$E_CRITICAL = 2;
67$E_UNKNOWN = 3;
68
69# Firmware update lock file [FIXME: location on Windows?]
70$FW_LOCK = '/var/lock/.spsetup'; # default on Linux
71
72# Usage text
73$USAGE = <<"END_USAGE";
74Usage: $NAME [OPTION]...
75END_USAGE
76
77# Help text
78$HELP = <<'END_HELP';
79
80GENERAL OPTIONS:
81
82 -p, --perfdata Output performance data
83 -t, --timeout Plugin timeout in seconds
84 -c, --critical Customise temperature critical limits
85 -w, --warning Customise temperature warning limits
86 -d, --debug Debug output, reports everything
87 -h, --help Display this help text
88 -V, --version Display version info
89
90SNMP OPTIONS:
91
92 -H, --hostname Hostname or IP of the server (needed for SNMP)
93 -C, --community SNMP community string
94 -P, --protocol SNMP protocol version
95 --port SNMP port number
96
97OUTPUT OPTIONS:
98
99 -i, --info Prefix any alerts with the service tag
100 -e, --extinfo Append system info to alerts
101 -s, --state Prefix alerts with alert state
102 --short-state Prefix alerts with alert state (abbreviated)
103 -o, --okinfo Verbosity when check result is OK
104 --htmlinfo HTML output with clickable links
105
106CHECK CONTROL AND BLACKLISTING:
107
108 -a, --all Check everything, even log content
109 -b, --blacklist Blacklist missing and/or failed components
110 --only Only check a certain component or alert type
111 --check Fine-tune which components are checked
112
113For more information and advanced options, see the manual page or URL:
114 http://folk.uio.no/trondham/software/check_openmanage.html
115END_HELP
116
117# Version and license text
118$LICENSE = <<"END_LICENSE";
119$NAME $VERSION
120Copyright (C) 2009 $AUTHOR
121License GPLv3+: GNU GPL version 3 or later <http://gnu.org/licenses/gpl.html>
122This is free software: you are free to change and redistribute it.
123There is NO WARRANTY, to the extent permitted by law.
124
125Written by $AUTHOR <$CONTACT>
126END_LICENSE
127
128# Options with default values
129%opt = ( 'blacklist' => [],
130 'check' => [],
131 'critical' => [],
132 'warning' => [],
133 'timeout' => 30, # default timeout is 30 seconds
134 'debug' => 0,
135 'help' => 0,
136 'perfdata' => undef,
137 'info' => 0,
138 'extinfo' => 0,
139 'htmlinfo' => undef,
140 'postmsg' => undef,
141 'state' => 0,
142 'short-state' => 0,
143 'okinfo' => 0, # default "ok" output level
144 'linebreak' => undef,
145 'version' => 0,
146 'all' => 0,
147 'only' => undef,
9ed0700c 148 'omreport' => undef,
669797e1 149 'port' => 161, # default SNMP port
150 'hostname' => undef,
151 'community' => 'public', # SMNP v1 or v2c
152 'protocol' => 2,
153 'username' => undef, # SMNP v3
154 'authpassword' => undef, # SMNP v3
155 'authkey' => undef, # SMNP v3
156 'authprotocol' => undef, # SMNP v3
157 'privpassword' => undef, # SMNP v3
158 'privkey' => undef, # SMNP v3
159 'privprotocol' => undef, # SMNP v3
160 );
161
162# Get options
163GetOptions('b|blacklist=s' => \@{ $opt{blacklist} },
164 'check=s' => \@{ $opt{check} },
165 'c|critical=s' => \@{ $opt{critical} },
166 'w|warning=s' => \@{ $opt{warning} },
167 't|timeout=i' => \$opt{timeout},
168 'd|debug' => \$opt{debug},
169 'h|help' => \$opt{help},
170 'V|version' => \$opt{version},
171 'p|perfdata:s' => \$opt{perfdata},
172 'i|info' => \$opt{info},
173 'e|extinfo' => \$opt{extinfo},
174 'htmlinfo:s' => \$opt{htmlinfo},
175 'postmsg=s' => \$opt{postmsg},
176 's|state' => \$opt{state},
177 'short-state' => \$opt{shortstate},
178 'o|ok-info=i' => \$opt{okinfo},
179 'l|linebreak=s' => \$opt{linebreak},
180 'a|all' => \$opt{all},
181 'only=s' => \$opt{only},
9ed0700c 182 'omreport=s' => \$opt{omreport},
669797e1 183 'port=i' => \$opt{port},
184 'H|hostname=s' => \$opt{hostname},
185 'C|community=s' => \$opt{community},
186 'P|protocol=i' => \$opt{protocol},
187 'U|username=s' => \$opt{username},
188 'authpassword=s' => \$opt{authpassword},
189 'authkey=s' => \$opt{authkey},
190 'authprotocol=s' => \$opt{authprotocol},
191 'privpassword=s' => \$opt{privpassword},
192 'privkey=s' => \$opt{privkey},
193 'privprotocol=s' => \$opt{privprotocol},
194 ) or do { print $USAGE; exit $E_UNKNOWN };
195
196# If user requested help
197if ($opt{help}) {
198 print $USAGE, $HELP;
199 exit $E_OK;
200}
201
202# If user requested version info
203if ($opt{version}) {
204 print $LICENSE;
205 exit $E_OK;
206}
207
208# Setting timeout
209$SIG{ALRM} = sub {
210 print "PLUGIN TIMEOUT: $NAME timed out after $opt{timeout} seconds\n";
211 exit $E_UNKNOWN;
212};
213alarm $opt{timeout};
214
215# If we're using SNMP
216$snmp = defined $opt{hostname} ? 1 : 0;
217
218# SNMP session variables
219$snmp_session = undef;
220$snmp_error = undef;
221
222# The omreport command
223$omreport = undef;
224
225# Check flags, override available with the --check option
226%check = ( 'storage' => 1, # check storage subsystem
227 'memory' => 1, # check memory (dimms)
228 'fans' => 1, # check fan status
229 'power' => 1, # check power supplies
230 'temp' => 1, # check temperature
231 'cpu' => 1, # check processors
232 'voltage' => 1, # check voltage
233 'batteries' => 1, # check battery probes
234 'amperage' => 1, # check power consumption
235 'intrusion' => 1, # check intrusion detection
236 'alertlog' => 0, # check the alert log
237 'esmlog' => 0, # check the ESM log (hardware log)
238 'esmhealth' => 1, # check the ESM log overall health
239 );
240
241# Default line break
242$linebreak = isatty(*STDOUT) ? "\n" : '<br/>';
243
244# Line break from option
245if (defined $opt{linebreak}) {
246 if ($opt{linebreak} eq 'REG') {
247 $linebreak = "\n";
248 }
249 elsif ($opt{linebreak} eq 'HTML') {
250 $linebreak = '<br/>';
251 }
252 else {
253 $linebreak = $opt{linebreak};
254 }
255}
256
257# Exit with status=UNKNOWN if there is firmware upgrade in progress
258if (!$snmp && -f $FW_LOCK) {
259 print "MONITORING DISABLED - Firmware update in progress ($FW_LOCK exists)\n";
260 exit $E_UNKNOWN;
261}
262
263# List of controllers and enclosures
264@controllers = (); # controllers
265@enclosures = (); # enclosures
266
267# Messages
268@report_storage = (); # messages with associated nagios level (storage)
269@report_chassis = (); # messages with associated nagios level (chassis)
270@report_other = (); # messages with associated nagios level (other)
271
272# Counters for everything
273%count
274 = (
275 'pdisk' => 0, # number of physical disks
276 'vdisk' => 0, # number of logical drives (virtual disks)
277 'temp' => 0, # number of temperature probes
278 'volt' => 0, # number of voltage probes
279 'amp' => 0, # number of amperage probes
280 'intr' => 0, # number of intrusion probes
281 'dimm' => 0, # number of memory modules
282 'fan' => 0, # number of fan probes
283 'cpu' => 0, # number of CPUs
284 'bat' => 0, # number of batteries
285 'power' => 0, # number of power supplies
286 'esm' => {
287 'Critical' => 0, # critical entries in ESM log
288 'Non-Critical' => 0, # warning entries in ESM log
289 'Ok' => 0, # ok entries in ESM log
290 },
291 'alert' => {
292 'Critical' => 0, # critical entries in alert log
293 'Non-Critical' => 0, # warning entries in alert log
294 'Ok' => 0, # ok entries in alert log
295 },
296 );
297
298# Performance data
299%perfdata = ();
300
301# Global health status
302$global = 1; # default is to check global status
303$globalstatus = $E_OK; # default global health status is "OK"
304
305# Nagios error levels reversed
306%reverse_exitcode
307 = (
308 $E_OK => 'OK',
309 $E_WARNING => 'WARNING',
310 $E_CRITICAL => 'CRITICAL',
311 $E_UNKNOWN => 'UNKNOWN',
312 );
313
314# OpenManage (omreport) and SNMP error levels
315%status2nagios
316 = (
317 'Unknown' => $E_CRITICAL,
318 'Critical' => $E_CRITICAL,
319 'Non-Critical' => $E_WARNING,
320 'Ok' => $E_OK,
321 'Non-Recoverable' => $E_CRITICAL,
322 'Other' => $E_CRITICAL,
323 );
324
325# Status via SNMP
326%snmp_status
327 = (
328 1 => 'Other',
329 2 => 'Unknown',
330 3 => 'Ok',
331 4 => 'Non-Critical',
332 5 => 'Critical',
333 6 => 'Non-Recoverable',
334 );
335
336# Probe Status via SNMP
337%snmp_probestatus
338 = (
339 1 => 'Other', # probe status is not one of the following:
340 2 => 'Unknown', # probe status is unknown (not known or monitored)
341 3 => 'Ok', # probe is reporting a value within the thresholds
342 4 => 'nonCriticalUpper', # probe has crossed upper noncritical threshold
343 5 => 'criticalUpper', # probe has crossed upper critical threshold
344 6 => 'nonRecoverableUpper', # probe has crossed upper non-recoverable threshold
345 7 => 'nonCriticalLower', # probe has crossed lower noncritical threshold
346 8 => 'criticalLower', # probe has crossed lower critical threshold
347 9 => 'nonRecoverableLower', # probe has crossed lower non-recoverable threshold
348 10 => 'failed', # probe is not functional
349 );
350
351# Probe status translated to Nagios alarm levels
352%probestatus2nagios
353 = (
354 'Other' => $E_CRITICAL,
355 'Unknown' => $E_CRITICAL,
356 'Ok' => $E_OK,
357 'nonCriticalUpper' => $E_WARNING,
358 'criticalUpper' => $E_CRITICAL,
359 'nonRecoverableUpper' => $E_CRITICAL,
360 'nonCriticalLower' => $E_WARNING,
361 'criticalLower' => $E_CRITICAL,
362 'nonRecoverableLower' => $E_CRITICAL,
363 'failed' => $E_CRITICAL,
364 );
365
366# System information gathered
367%sysinfo
368 = (
369 'bios' => 'N/A', # BIOS version
370 'biosdate' => 'N/A', # BIOS release date
371 'serial' => 'N/A', # serial number (service tag)
372 'model' => 'N/A', # system model
373 'osname' => 'N/A', # OS name
374 'osver' => 'N/A', # OS version
375 'om' => 'N/A', # OMSA version
376 'bmc' => 0, # HAS baseboard management controller (BMC)
377 'rac' => 0, # HAS remote access controller (RAC)
378 'rac_name' => 'N/A', # remote access controller (RAC)
379 'bmc_fw' => 'N/A', # BMC firmware
380 'rac_fw' => 'N/A', # RAC firmware
381 );
382
383# Adjust which checks to perform
384adjust_checks() if defined $opt{check};
385
386# Blacklisted components
387%blacklist = defined $opt{blacklist} ? %{ get_blacklist() } : ();
388
389# If blacklisting is in effect, don't check global health status
390if (scalar keys %blacklist > 0) {
391 $global = 0;
392}
393
394# Take into account new hardware and blades
395$omopt_chassis = 'chassis'; # default "chassis" option to omreport
396$omopt_system = 'system'; # default "system" option to omreport
397$blade = 0; # if this is a blade system
398
399# Some initializations and checking before we begin
400if ($snmp) {
401 snmp_initialize(); # initialize SNMP
402 snmp_check(); # check that SNMP works
403 snmp_detect_blade(); # detect blade via SNMP
404}
405else {
406 # Find the omreport binary
407 find_omreport();
408 # Check help output from omreport, see which options are available.
409 # Also detecting blade via omreport.
410 check_omreport_options();
411}
412
413
414#---------------------------------------------------------------------
415# Helper functions
416#---------------------------------------------------------------------
417
418#
419# Store a message in one of the message arrays
420#
421sub report {
422 my ($type, $msg, $exval, $id) = @_;
423 defined $id or $id = q{};
424
425 my %type2array
426 = (
427 'storage' => \@report_storage,
428 'chassis' => \@report_chassis,
429 'other' => \@report_other,
430 );
431
432 return push @{ $type2array{$type} }, [ $msg, $exval, $id ];
433}
434
435
436#
437# Run command, put resulting output lines in an array and return a
438# pointer to that array
439#
440sub run_command {
441 my $command = shift;
442
443 open my $CMD, '-|', $command
444 or do { report('other', "Couldn't run command '$command': $!", $E_UNKNOWN)
445 and return [] };
446 my @lines = <$CMD>;
447 close $CMD
448 or do { report('other', "Couldn't close filehandle for command '$command': $!", $E_UNKNOWN)
449 and return \@lines };
450 return \@lines;
451}
452
453#
454# Run command, put resulting output in a string variable and return it
455#
456sub slurp_command {
457 my $command = shift;
458
459 open my $CMD, '-|', $command
460 or do { report('other', "Couldn't run command '$command': $!", $E_UNKNOWN) and return };
461 my $rawtext = do { local $/ = undef; <$CMD> }; # slurping
462 close $CMD;
463
464 # NOTE: We don't check the return value of close() since omreport
465 # does something weird sometimes.
466
467 return $rawtext;
468}
469
470#
471# Initialize SNMP
472#
473sub snmp_initialize {
474 # Legal SNMP v3 protocols
475 my $snmp_v3_privprotocol = qr{\A des|aes|aes128|3des|3desde \z}xms;
476 my $snmp_v3_authprotocol = qr{\A md5|sha \z}xms;
477
478 # Parameters to Net::SNMP->session()
479 my %param
480 = (
481 '-port' => $opt{port},
482 '-hostname' => $opt{hostname},
483 '-version' => $opt{protocol},
484 );
485
486 # Parameters for SNMP v3
487 if ($opt{protocol} == 3) {
488
489 # Username is mandatory
490 if (defined $opt{username}) {
491 $param{'-username'} = $opt{username};
492 }
493 else {
494 print "SNMP ERROR: With SNMPv3 the username must be specified\n";
495 exit $E_UNKNOWN;
496 }
497
498 # Authpassword is optional
499 if (defined $opt{authpassword}) {
500 $param{'-authpassword'} = $opt{authpassword};
501 }
502
503 # Authkey is optional
504 if (defined $opt{authkey}) {
505 $param{'-authkey'} = $opt{authkey};
506 }
507
508 # Privpassword is optional
509 if (defined $opt{privpassword}) {
510 $param{'-privpassword'} = $opt{privpassword};
511 }
512
513 # Privkey is optional
514 if (defined $opt{privkey}) {
515 $param{'-privkey'} = $opt{privkey};
516 }
517
518 # Privprotocol is optional
519 if (defined $opt{privprotocol}) {
520 if ($opt{privprotocol} =~ m/$snmp_v3_privprotocol/xms) {
521 $param{'-privprotocol'} = $opt{privprotocol};
522 }
523 else {
524 print "SNMP ERROR: Unknown privprotocol '$opt{privprotocol}', "
525 . "must be one of [des|aes|aes128|3des|3desde]\n";
526 exit $E_UNKNOWN;
527 }
528 }
529
530 # Authprotocol is optional
531 if (defined $opt{authprotocol}) {
532 if ($opt{authprotocol} =~ m/$snmp_v3_authprotocol/xms) {
533 $param{'-authprotocol'} = $opt{authprotocol};
534 }
535 else {
536 print "SNMP ERROR: Unknown authprotocol '$opt{authprotocol}', "
537 . "must be one of [md5|sha]\n";
538 exit $E_UNKNOWN;
539 }
540 }
541 }
542 # Parameters for SNMP v2c or v1
543 elsif ($opt{protocol} == 2 or $opt{protocol} == 1) {
544 $param{'-community'} = $opt{community};
545 }
546 else {
547 print "SNMP ERROR: Unknown SNMP version '$opt{protocol}'\n";
548 exit $E_UNKNOWN;
549 }
550
551 # Try to initialize the SNMP session
552 if ( eval { require Net::SNMP; 1 } ) {
553 ($snmp_session, $snmp_error) = Net::SNMP->session( %param );
554 if (!defined $snmp_session) {
555 printf "SNMP: %s\n", $snmp_error;
556 exit $E_UNKNOWN;
557 }
558 }
559 else {
560 print "You need perl module Net::SNMP to run $NAME in SNMP mode\n";
561 exit $E_UNKNOWN;
562 }
563 return;
564}
565
566#
567# Checking if SNMP works by probing for "chassisModelName", which all
568# servers should have
569#
570sub snmp_check {
571 my $chassisModelName = '1.3.6.1.4.1.674.10892.1.300.10.1.9.1';
572 my $result = $snmp_session->get_request(-varbindlist => [$chassisModelName]);
573
574 # Typically if remote host isn't responding
575 if (!defined $result) {
576 printf "SNMP CRITICAL: %s\n", $snmp_session->error;
577 exit $E_CRITICAL;
578 }
579
580 # If OpenManage isn't installed or is not working
581 if ($result->{$chassisModelName} =~ m{\A noSuch (Instance|Object) \z}xms) {
582 print "ERROR: (SNMP) OpenManage is not installed or is not working correctly\n";
583 exit $E_UNKNOWN;
584 }
585 return;
586}
587
588#
589# Detecting blade via SNMP
590#
591sub snmp_detect_blade {
592 my $DellBaseBoardType = '1.3.6.1.4.1.674.10892.1.300.80.1.7.1.1';
593 my $result = $snmp_session->get_request(-varbindlist => [$DellBaseBoardType]);
594
595 # Identify blade. Older models (4th and 5th gen models) and/or old
596 # OMSA (4.x) don't have this OID. If we get "noSuchInstance" or
597 # similar, we assume that this isn't a blade
598 if ($result->{$DellBaseBoardType} eq '3') {
599 $blade = 1;
600 }
601 return;
602}
603
604#
605# Locate the omreport binary
606#
607sub find_omreport {
608 # Possible full paths for omreport
609 my @omreport_paths
610 = (
611 '/usr/bin/omreport', # default on Linux
612 '/opt/dell/srvadmin/oma/bin/omreport.sh', # alternate on Linux
613 '/opt/dell/srvadmin/oma/bin/omreport', # alternate on Linux
614 'c:\progra~1\dell\sysmgt\oma\bin\omreport.exe', # default on Windows
615 'c:\progra~2\dell\sysmgt\oma\bin\omreport.exe', # default on Windows x64
616 );
617
9ed0700c 618 # If user has specified path to omreport
619 if (defined $opt{omreport} and -x $opt{omreport}) {
620 $omreport = $opt{omreport};
621 return;
622 }
623
669797e1 624 # Find the one to use
625 OMREPORT_PATH:
626 foreach my $bin (@omreport_paths) {
627 if (-x $bin) {
628 $omreport = $bin;
629 last OMREPORT_PATH;
630 }
631 }
632
633 # Exit with status=UNKNOWN if OM is not installed, or we don't
634 # have permission to execute the binary
635 if (!defined $omreport) {
636 print "ERROR: Dell OpenManage Server Administrator (OMSA) is not installed\n";
637 exit $E_UNKNOWN;
638 }
639 return;
640}
641
642#
643# Checks output from 'omreport -?' and searches for arguments to
644# omreport, to accommodate deprecated options "chassis" and "system"
645# (on newer hardware), as well as blade servers.
646#
647sub check_omreport_options {
648 foreach (@{ run_command("$omreport -? 2>&1") }) {
649 if (m/\A servermodule /xms) {
650 # If "servermodule" argument to omreport exists, use it
651 # instead of argument "system"
652 $omopt_system = 'servermodule';
653 }
654 elsif (m/\A mainsystem /xms) {
655 # If "mainsystem" argument to omreport exists, use it
656 # instead of argument "chassis"
657 $omopt_chassis = 'mainsystem';
658 }
659 elsif (m/\A modularenclosure /xms) {
660 # If "modularenclusure" argument to omreport exists, assume
661 # that this is a blade
662 $blade = 1;
663 }
664 }
665 return;
666}
667
668#
669# Read the blacklist option and return a hash containing the
670# blacklisted components
671#
672sub get_blacklist {
673 my @bl = ();
674 my %blacklist = ();
675
676 if (scalar @{ $opt{blacklist} } >= 0) {
677 foreach my $black (@{ $opt{blacklist} }) {
678 my $tmp = q{};
679 if (-f $black) {
680 open my $BL, '<', $black
681 or do { report('other', "Couldn't open blacklist file $black: $!", $E_UNKNOWN)
682 and return {} };
683 $tmp = <$BL>;
684 close $BL;
685 chomp $tmp;
686 }
687 else {
688 $tmp = $black;
689 }
690 push @bl, $tmp;
691 }
692 }
693
694 return {} if $#bl < 0;
695
696 # Parse blacklist string, put in hash
697 foreach my $black (@bl) {
698 my @comps = split m{/}xms, $black;
699 foreach my $c (@comps) {
700 next if $c !~ m/=/xms;
701 my ($key, $val) = split /=/xms, $c;
702 my @vals = split /,/xms, $val;
703 $blacklist{$key} = \@vals;
704 }
705 }
706
707 return \%blacklist;
708}
709
710#
711# Read the check option and adjust the hash %check, which is a rough
712# list of components to be checked
713#
714sub adjust_checks {
715 my @cl = ();
716
717 # Adjust checking based on the '--all' option
718 if ($opt{all}) {
719 # Check option usage
720 if (defined $opt{only} and $opt{only} !~ m{\A critical|warning \z}xms) {
721 print qq{ERROR: Wrong simultaneous usage of the "--all" and "--only" options\n};
722 exit $E_UNKNOWN;
723 }
724 if (scalar @{ $opt{check} } > 0) {
725 print qq{ERROR: Wrong simultaneous usage of the "--all" and "--check" options\n};
726 exit $E_UNKNOWN;
727 }
728
729 # set the check hash to check everything
730 map { $_ = 1 } values %check;
731
732 return;
733 }
734
735 # Adjust checking based on the '--only' option
736 if (defined $opt{only} and $opt{only} !~ m{\A critical|warning \z}xms) {
737 # Check option usage
738 if (scalar @{ $opt{check} } > 0) {
739 print qq{ERROR: Wrong simultaneous usage of the "--only" and "--check" options\n};
740 exit $E_UNKNOWN;
741 }
742 if (! exists $check{$opt{only}} and $opt{only} ne 'chassis') {
743 print qq{ERROR: "$opt{only}" is not a known keyword for the "--only" option\n};
744 exit $E_UNKNOWN;
745 }
746
747 # reset the check hash
748 map { $_ = 0 } values %check;
749
750 # adjust the check hash
751 if ($opt{only} eq 'chassis') {
752 map { $check{$_} = 1 } qw(memory fans power temp cpu voltage
753 batteries amperage intrusion esmhealth);
754 }
755 else {
756 $check{$opt{only}} = 1;
757 }
758
759 return;
760 }
761
762 # Adjust checking based on the '--check' option
763 if (scalar @{ $opt{check} } >= 0) {
764 foreach my $check (@{ $opt{check} }) {
765 my $tmp = q{};
766 if (-f $check) {
767 open my $CL, '<', $check
768 or do { report('other', "Couldn't open check file $check: $!", $E_UNKNOWN) and return };
769 $tmp = <$CL>;
770 close $CL;
771 }
772 else {
773 $tmp = $check;
774 }
775 push @cl, $tmp;
776 }
777 }
778
779 return if $#cl < 0;
780
781 # Parse checklist string, put in hash
782 foreach my $check (@cl) {
783 my @checks = split /,/xms, $check;
784 foreach my $c (@checks) {
785 next if $c !~ m/=/xms;
786 my ($key, $val) = split /=/xms, $c;
787 $check{$key} = $val;
788 }
789 }
790
791 # Check if we should check global health status
792 CHECK_KEY:
793 foreach (keys %check) {
794 next CHECK_KEY if $_ eq 'esmlog'; # not part of global status
795 next CHECK_KEY if $_ eq 'alertlog'; # not part of global status
796
797 if ($check{$_} == 0) { # found something with checking turned off
798 $global = 0;
799 last CHECK_KEY;
800 }
801 }
802
803 return;
804}
805
806#
807# Runs omreport and returns an array of anonymous hashes containing
808# the output.
809# Takes one argument: string containing parameters to omreport
810#
811sub run_omreport {
812 my $command = shift;
813 my @output = ();
814 my @keys = ();
815
816 # Errors that are OK. Some low-end poweredge (and blades) models
817 # don't have RAID controllers, intrusion detection sensor, or
818 # redundant/instrumented power supplies etc.
819 my $ok_errors
820 = qr{
821 Intrusion\sinformation\sis\snot\sfound\sfor\sthis\ssystem # No intrusion probe
822 | No\sinstrumented\spower\ssupplies\sfound\son\sthis\ssystem # No instrumented PS (blades/low-end)
823 | No\scontrollers\sfound # No RAID controller
824 | No\sbattery\sprobes\sfound\son\sthis\ssystem # No battery probes
825 | Invalid\scommand:\spwrmonitoring # Older OMSAs lack this command(?)
826 }xms;
827
828 # Errors that are OK on blade servers
829 my $ok_blade_errors
830 = qr{
831 No\sfan\sprobes\sfound\son\sthis\ssystem # No fan probes
832 }xms;
833
834 # Run omreport and fetch output
835 my $rawtext = slurp_command("$omreport $command -fmt ssv 2>&1");
836 return [] if !defined $rawtext;
837
838 # Workaround for Openmanage BUG introduced in OMSA 5.5.0
839 $rawtext =~ s/\n;/;/gxms if $command eq 'storage controller';
840
841 # Parse output, store in array
842 for ((split /\n/xms, $rawtext)) {
843 if (m/\A Error/xms) {
844 next if m{$ok_errors}xms;
845 next if ($blade and m{$ok_blade_errors}xms);
846 report('other', "Problem running 'omreport $command': $_", $E_UNKNOWN);
847 }
848
849 next if !m/(.*?;){2}/xms; # ignore lines with less than 3 fields
850 my @vals = split /;/xms;
851 if ($vals[0] =~ m/\A (Index|ID|Severity) \z/xms) {
852 @keys = @vals;
853 }
854 else {
855 my $i = 0;
856 push @output, { map { $_ => $vals[$i++] } @keys };
857 }
858
859 }
860
861 # Finally, return the collected information
862 return \@output;
863}
864
865
866#
867# Checks if a component is blacklisted. Returns 1 if the component is
868# blacklisted, 0 otherwise. Takes two arguments:
869# arg1: component name
870# arg2: component id or index
871#
872sub blacklisted {
873 my $name = shift; # component name
874 my $id = shift; # component id
875 my $ret = 0; # return value
876
877 if (defined $blacklist{$name}) {
878 foreach my $comp (@{ $blacklist{$name} }) {
879 if (defined $id and $comp eq $id) {
880 $ret = 1;
881 }
882 }
883 }
884
885 return $ret;
886}
887
888# Converts the NexusID from SNMP to our version
889sub convert_nexus {
890 my $nexus = shift;
891 $nexus =~ s{\A \\}{}xms;
892 $nexus =~ s{\\}{:}gxms;
893 return $nexus;
894}
895
896# Sets custom temperature thresholds based on user supplied options
897sub custom_temperature_thresholds {
898 my $type = shift; # type of threshold, either w (warning) or c (critical)
899 my %thres = (); # will contain the thresholds
900 my @limits = (); # holds the input
901
902 my @opt = $type eq 'w' ? @{ $opt{warning} } : @{ $opt{critical} };
903
904 if (scalar @opt >= 0) {
905 foreach my $t (@opt) {
906 my $tmp = q{};
907 if (-f $t) {
908 open my $F, '<', $t
909 or do { report('other', "Couldn't open temperature threshold file $t: $!",
910 $E_UNKNOWN) and return {} };
911 $tmp = <$F>;
912 close $F;
913 }
914 else {
915 $tmp = $t;
916 }
917 push @limits, $tmp;
918 }
919 }
920
921 # Parse checklist string, put in hash
922 foreach my $th (@limits) {
923 my @tmp = split m{,}xms, $th;
924 foreach my $t (@tmp) {
925 next if $t !~ m{=}xms;
926 my ($key, $val) = split m{=}xms, $t;
927 if ($val =~ m{/}xms) {
928 my ($max, $min) = split m{/}xms, $val;
929 $thres{$key}{max} = $max;
930 $thres{$key}{min} = $min;
931 }
932 else {
933 $thres{$key}{max} = $val;
934 }
935 }
936 }
937
938 return \%thres;
939}
940
941
942# Gets the output from SNMP result according to the OIDs checked
943sub get_snmp_output {
944 my ($result,$oidref) = @_;
945 my @output = ();
946
947 foreach my $oid (keys %{ $result }) {
948 my @dummy = split /\./xms, $oid;
949 my $id = pop @dummy;
950 --$id;
951 my $foo = join q{.}, @dummy;
952 if (exists $oidref->{$foo}) {
953 $output[$id]{$oidref->{$foo}} = $result->{$oid};
954 }
955 }
956 return \@output;
957}
958
959
960# Map the controller or other item in-place
961sub map_item {
962 my ($key, $val, $list) = @_;
963
964 foreach my $lst (@{ $list }) {
965 if (!exists $lst->{$key}) {
966 $lst->{$key} = $val;
967 }
968 }
969 return;
970}
971
972# Return the URL for official Dell documentation for a specific
973# PowerEdge server
974sub documentation_url {
975 my $model = shift;
976
977 # create model short form, e.g. "r710"
978 $model =~ s{\A PowerEdge \s (.+?) \z}{lc($1)}exms;
979
980 # special case for blades (e.g. M600, M710), they have common
981 # documentation
982 $model =~ s{\A m\d+ \z}{m}xms;
983
984 return 'http://support.dell.com/support/edocs/systems/pe' . $model . '/';
985}
986
987# Return the URL for warranty information for a server with a given
988# serial number (servicetag)
989sub warranty_url {
990 my $tag = shift;
991
992 # Dell support sites for different parts of the world
993 my %supportsite
994 = (
995 'emea' => 'http://support.euro.dell.com/support/topics/topic.aspx/emea/shared/support/my_systems_info/',
996 'ap' => 'http://supportapj.dell.com/support/topics/topic.aspx/ap/shared/support/my_systems_info/en/details?',
997 'glob' => 'http://support.dell.com/support/topics/global.aspx/support/my_systems_info/details?',
998 );
999
1000 # warranty URLs for different country codes
1001 my %url
1002 = (
1003 # EMEA
1004 'at' => $supportsite{emea} . 'de/details?c=at&l=de&ServiceTag=', # Austria
1005 'be' => $supportsite{emea} . 'nl/details?c=be&l=nl&ServiceTag=', # Belgium
1006 'cz' => $supportsite{emea} . 'cs/details?c=cz&l=cs&ServiceTag=', # Czech Republic
1007 'de' => $supportsite{emea} . 'de/details?c=de&l=de&ServiceTag=', # Germany
1008 'dk' => $supportsite{emea} . 'da/details?c=dk&l=da&ServiceTag=', # Denmark
1009 'es' => $supportsite{emea} . 'es/details?c=es&l=es&ServiceTag=', # Spain
1010 'fi' => $supportsite{emea} . 'fi/details?c=fi&l=fi&ServiceTag=', # Finland
1011 'fr' => $supportsite{emea} . 'fr/details?c=fr&l=fr&ServiceTag=', # France
1012 'gr' => $supportsite{emea} . 'en/details?c=gr&l=el&ServiceTag=', # Greece
1013 'it' => $supportsite{emea} . 'it/details?c=it&l=it&ServiceTag=', # Italy
1014 'il' => $supportsite{emea} . 'en/details?c=il&l=en&ServiceTag=', # Israel
1015 'me' => $supportsite{emea} . 'en/details?c=me&l=en&ServiceTag=', # Middle East
1016 'no' => $supportsite{emea} . 'no/details?c=no&l=no&ServiceTag=', # Norway
1017 'nl' => $supportsite{emea} . 'nl/details?c=nl&l=nl&ServiceTag=', # The Netherlands
1018 'pl' => $supportsite{emea} . 'pl/details?c=pl&l=pl&ServiceTag=', # Poland
1019 'pt' => $supportsite{emea} . 'en/details?c=pt&l=pt&ServiceTag=', # Portugal
1020 'ru' => $supportsite{emea} . 'ru/details?c=ru&l=ru&ServiceTag=', # Russia
1021 'se' => $supportsite{emea} . 'sv/details?c=se&l=sv&ServiceTag=', # Sweden
1022 'uk' => $supportsite{emea} . 'en/details?c=uk&l=en&ServiceTag=', # United Kingdom
1023 'za' => $supportsite{emea} . 'en/details?c=za&l=en&ServiceTag=', # South Africa
1024 # America
1025 'br' => $supportsite{glob} . 'c=br&l=pt&ServiceTag=', # Brazil
1026 'ca' => $supportsite{glob} . 'c=ca&l=en&ServiceTag=', # Canada
1027 'mx' => $supportsite{glob} . 'c=mx&l=es&ServiceTag=', # Mexico
1028 'us' => $supportsite{glob} . 'c=us&l=en&ServiceTag=', # USA
1029 # Asia/Pacific
1030 'au' => $supportsite{ap} . 'c=au&l=en&ServiceTag=', # Australia
1031 'cn' => $supportsite{ap} . 'c=cn&l=zh&ServiceTag=', # China
1032 'in' => $supportsite{ap} . 'c=in&l=en&ServiceTag=', # India
1033 # default fallback
1034 'XX' => $supportsite{glob} . 'ServiceTag=', # default
1035 );
1036
1037 if (exists $url{$opt{htmlinfo}}) {
1038 return $url{$opt{htmlinfo}} . $tag;
1039 }
1040 else {
1041 return $url{XX} . $tag;
1042 }
1043}
1044
1045
1046
1047#---------------------------------------------------------------------
1048# Check functions
1049#---------------------------------------------------------------------
1050
1051#-----------------------------------------
1052# Check global health status
1053#-----------------------------------------
1054sub check_global {
1055 my $health = $E_OK;
1056
1057 if ($snmp) {
1058 #
1059 # Checks global status, i.e. both storage and chassis
1060 #
1061 my $systemStateGlobalSystemStatus = '1.3.6.1.4.1.674.10892.1.200.10.1.2.1';
1062 my $result = $snmp_session->get_request(-varbindlist => [$systemStateGlobalSystemStatus]);
1063 if (!defined $result) {
98b224a3 1064 printf "SNMP ERROR [global]: %s\n", $snmp_error;
669797e1 1065 exit $E_UNKNOWN;
1066 }
1067 $health = $status2nagios{$snmp_status{$result->{$systemStateGlobalSystemStatus}}};
1068 }
1069 else {
1070 #
1071 # NB! This does not check storage, only chassis...
1072 #
1073 foreach (@{ run_command("$omreport $omopt_system -fmt ssv") }) {
1074 next if !m/;/xms;
1075 next if m/\A SEVERITY;COMPONENT/xms;
1076 if (m/\A (.+?);Main\sSystem(\sChassis)? /xms) {
1077 $health = $status2nagios{$1};
1078 last;
1079 }
1080 }
1081 }
1082
1083 # Return the status
1084 return $health;
1085}
1086
1087
1088#-----------------------------------------
1089# STORAGE: Check controllers
1090#-----------------------------------------
1091sub check_controllers {
1092 my $id = undef;
1093 my $nexus = undef;
1094 my $name = undef;
1095 my $state = undef;
1096 my $status = undef;
1097 my $minfw = undef;
1098 my $mindr = undef;
1099 my $firmware = undef;
1100 my $driver = undef;
1101 my @output = ();
1102
1103 if ($snmp) {
1104 my %ctrl_oid
1105 = (
1106 '1.3.6.1.4.1.674.10893.1.20.130.1.1.1' => 'controllerNumber',
1107 '1.3.6.1.4.1.674.10893.1.20.130.1.1.2' => 'controllerName',
1108 '1.3.6.1.4.1.674.10893.1.20.130.1.1.5' => 'controllerState',
1109 '1.3.6.1.4.1.674.10893.1.20.130.1.1.8' => 'controllerFWVersion',
1110 '1.3.6.1.4.1.674.10893.1.20.130.1.1.38' => 'controllerComponentStatus',
1111 '1.3.6.1.4.1.674.10893.1.20.130.1.1.39' => 'controllerNexusID',
1112 '1.3.6.1.4.1.674.10893.1.20.130.1.1.41' => 'controllerDriverVersion',
1113 '1.3.6.1.4.1.674.10893.1.20.130.1.1.44' => 'controllerMinFWVersion',
1114 '1.3.6.1.4.1.674.10893.1.20.130.1.1.45' => 'controllerMinDriverVersion',
1115 );
1116
1117 # We use get_table() here for the odd case where a server has
1118 # two or more controllers, and where some OIDs are missing on
1119 # one of the controllers.
1120 my $controllerTable = '1.3.6.1.4.1.674.10893.1.20.130.1';
1121 my $result = $snmp_session->get_table(-baseoid => $controllerTable);
1122
1123 # No controllers is OK
1124 return if !defined $result;
1125
1126 @output = @{ get_snmp_output($result, \%ctrl_oid) };
1127 }
1128 else {
1129 @output = @{ run_omreport('storage controller') };
1130 }
1131
1132 my %ctrl_state
1133 = (
1134 0 => 'Unknown',
1135 1 => 'Ready',
1136 2 => 'Failed',
1137 3 => 'Online',
1138 4 => 'Offline',
1139 6 => 'Degraded',
1140 );
1141
1142 CTRL:
1143 foreach my $out (@output) {
1144 if ($snmp) {
1145 $id = $out->{'controllerNumber'} - 1;
1146 $name = $out->{'controllerName'};
1147 $state = $ctrl_state{$out->{'controllerState'}};
1148 $status = $snmp_status{$out->{'controllerComponentStatus'}};
1149 $minfw = exists $out->{'controllerMinFWVersion'}
1150 ? $out->{'controllerMinFWVersion'} : undef;
1151 $mindr = exists $out->{'controllerMinDriverVersion'}
1152 ? $out->{'controllerMinDriverVersion'} : undef;
1153 $firmware = exists $out->{controllerFWVersion}
1154 ? $out->{controllerFWVersion} : 'N/A';
1155 $driver = exists $out->{controllerDriverVersion}
1156 ? $out->{controllerDriverVersion} : 'N/A';
1157 $nexus = convert_nexus($out->{controllerNexusID});
1158 }
1159 else {
1160 $id = $out->{ID};
1161 $name = $out->{Name};
1162 $state = $out->{State};
1163 $status = $out->{Status};
1164 $minfw = $out->{'Minimum Required Firmware Version'} ne 'Not Applicable'
1165 ? $out->{'Minimum Required Firmware Version'} : undef;
1166 $mindr = $out->{'Minimum Required Driver Version'} ne 'Not Applicable'
1167 ? $out->{'Minimum Required Driver Version'} : undef;
1168 $firmware = $out->{'Firmware Version'} ne 'Not Applicable'
1169 ? $out->{'Firmware Version'} : 'N/A';
1170 $driver = $out->{'Driver Version'} ne 'Not Applicable'
1171 ? $out->{'Driver Version'} : 'N/A';
1172 $nexus = $id;
1173 }
1174
1175 $name =~ s{\s+\z}{}xms; # remove trailing whitespace
1176 push @controllers, $id;
1177
1178 # Collecting some storage info
1179 $sysinfo{'controller'}{$id}{'id'} = $nexus;
1180 $sysinfo{'controller'}{$id}{'name'} = $name;
1181 $sysinfo{'controller'}{$id}{'driver'} = $driver;
1182 $sysinfo{'controller'}{$id}{'firmware'} = $firmware;
1183
1184 next CTRL if blacklisted('ctrl', $nexus);
1185
1186 # Special case: old firmware
1187 if (!blacklisted('ctrl_fw', $id) && defined $minfw) {
1188 chomp $firmware;
98b224a3 1189 my $msg = sprintf q{Controller %d [%s]: Firmware '%s' is out of date},
669797e1 1190 $id, $name, $firmware;
1191 report('storage', $msg, $E_WARNING, $nexus);
1192 }
1193 # Special case: old driver
1194 if (!blacklisted('ctrl_driver', $id) && defined $mindr) {
1195 chomp $driver;
98b224a3 1196 my $msg = sprintf q{Controller %d [%s]: Driver '%s' is out of date},
669797e1 1197 $id, $name, $driver;
1198 report('storage', $msg, $E_WARNING, $nexus);
1199 }
1200 # Ok
1201 if ($status eq 'Ok' or ($status eq 'Non-Critical'
1202 and (defined $minfw or defined $mindr))) {
98b224a3 1203 my $msg = sprintf 'Controller %d [%s] is %s',
669797e1 1204 $id, $name, $state;
1205 report('storage', $msg, $E_OK, $nexus);
1206 }
1207 # Default
1208 else {
98b224a3 1209 my $msg = sprintf 'Controller %d [%s] needs attention: %s',
669797e1 1210 $id, $name, $state;
1211 report('storage', $msg, $status2nagios{$status}, $nexus);
1212 }
1213 }
1214 return;
1215}
1216
1217
1218#-----------------------------------------
1219# STORAGE: Check physical drives
1220#-----------------------------------------
1221sub check_physical_disks {
1222 return if $#controllers == -1;
1223
1224 my $id = undef;
1225 my $nexus = undef;
1226 my $name = undef;
1227 my $state = undef;
1228 my $status = undef;
1229 my $fpred = undef;
1230 my $progr = undef;
1231 my $ctrl = undef;
1232 my $vendor = undef; # disk vendor
1233 my $product = undef; # product ID
1234 my $capacity = undef; # disk length (size) in bytes
1235 my @output = ();
1236
1237 if ($snmp) {
1238 my %pdisk_oid
1239 = (
1240 '1.3.6.1.4.1.674.10893.1.20.130.4.1.1' => 'arrayDiskNumber',
1241 '1.3.6.1.4.1.674.10893.1.20.130.4.1.2' => 'arrayDiskName',
1242 '1.3.6.1.4.1.674.10893.1.20.130.4.1.3' => 'arrayDiskVendor',
1243 '1.3.6.1.4.1.674.10893.1.20.130.4.1.4' => 'arrayDiskState',
1244 '1.3.6.1.4.1.674.10893.1.20.130.4.1.6' => 'arrayDiskProductID',
1245 '1.3.6.1.4.1.674.10893.1.20.130.4.1.9' => 'arrayDiskEnclosureID',
1246 '1.3.6.1.4.1.674.10893.1.20.130.4.1.10' => 'arrayDiskChannel',
1247 '1.3.6.1.4.1.674.10893.1.20.130.4.1.11' => 'arrayDiskLengthInMB',
1248 '1.3.6.1.4.1.674.10893.1.20.130.4.1.15' => 'arrayDiskTargetID',
1249 '1.3.6.1.4.1.674.10893.1.20.130.4.1.16' => 'arrayDiskLunID',
1250 '1.3.6.1.4.1.674.10893.1.20.130.4.1.24' => 'arrayDiskComponentStatus',
1251 '1.3.6.1.4.1.674.10893.1.20.130.4.1.26' => 'arrayDiskNexusID',
1252 '1.3.6.1.4.1.674.10893.1.20.130.4.1.31' => 'arrayDiskSmartAlertIndication',
1253 '1.3.6.1.4.1.674.10893.1.20.130.5.1.5' => 'arrayDiskEnclosureConnectionEnclosureNumber',
1254 '1.3.6.1.4.1.674.10893.1.20.130.5.1.7' => 'arrayDiskEnclosureConnectionControllerNumber',
1255 );
1256 my $result = $snmp_session->get_entries(-columns => [keys %pdisk_oid]);
1257
1258 if (!defined $result) {
98b224a3 1259 printf "SNMP ERROR [storage / pdisk]: %s.\n", $snmp_session->error;
669797e1 1260 $snmp_session->close;
1261 exit $E_UNKNOWN;
1262 }
1263
1264 @output = @{ get_snmp_output($result, \%pdisk_oid) };
1265 }
1266 else {
1267 foreach my $c (@controllers) {
1268 push @output, @{ run_omreport("storage pdisk controller=$c") };
1269 map_item('ctrl', $c, \@output);
1270 }
1271 }
1272
1273 my %pdisk_state
1274 = (
1275 0 => 'Unknown',
1276 1 => 'Ready',
1277 2 => 'Failed',
1278 3 => 'Online',
1279 4 => 'Offline',
1280 6 => 'Degraded',
1281 7 => 'Recovering',
1282 11 => 'Removed',
1283 15 => 'Resynching',
1284 24 => 'Rebuilding',
1285 25 => 'No Media',
1286 26 => 'Formatting',
1287 28 => 'Diagnostics',
1288 34 => 'Predictive failure',
1289 35 => 'Initializing',
1290 39 => 'Foreign',
1291 40 => 'Clear',
1292 41 => 'Unsupported',
1293 53 => 'Incompatible',
1294 );
1295
1296 # Check physical disks on each of the controllers
1297 PDISK:
1298 foreach my $out (@output) {
1299 if ($snmp) {
1300 $name = $out->{arrayDiskName};
1301 if ($name =~ m{.*\d+:\d+:\d+\z}xms) {
1302 $id = join q{:}, ($out->{arrayDiskChannel}, $out->{arrayDiskEnclosureID},
1303 $out->{arrayDiskTargetID});
1304 }
1305 else {
1306 $id = join q{:}, ($out->{arrayDiskChannel}, $out->{arrayDiskTargetID});
1307 }
1308 $state = $pdisk_state{$out->{arrayDiskState}};
1309 $status = $snmp_status{$out->{arrayDiskComponentStatus}};
1310 $fpred = $out->{arrayDiskSmartAlertIndication} == 2 ? 1 : 0;
1311 $progr = q{};
1312 $ctrl = exists $out->{arrayDiskEnclosureConnectionControllerNumber}
1313 ? $out->{arrayDiskEnclosureConnectionControllerNumber} - 1
1314 : -1;
1315 $nexus = convert_nexus($out->{arrayDiskNexusID});
1316 $vendor = $out->{arrayDiskVendor};
1317 $product = $out->{arrayDiskProductID};
1318 $capacity = $out->{arrayDiskLengthInMB} * 1024**2;
1319 }
1320 else {
1321 $id = $out->{'ID'};
1322 $name = $out->{'Name'};
1323 $state = $out->{'State'};
1324 $status = $out->{'Status'};
1325 $fpred = lc($out->{'Failure Predicted'}) eq 'yes' ? 1 : 0;
1326 $progr = ' [' . $out->{'Progress'} . ']';
1327 $ctrl = $out->{'ctrl'};
1328 $nexus = join q{:}, $out->{ctrl}, $id;
1329 $vendor = $out->{'Vendor ID'};
1330 $product = $out->{'Product ID'};
1331 $capacity = $out->{'Capacity'};
1332 $capacity =~ s{\A .*? \((\d+) \s bytes\) \z}{$1}xms;
1333 }
1334
1335 next PDISK if blacklisted('pdisk', $nexus);
1336 $count{pdisk}++;
1337
1338 $vendor =~ s{\s+\z}{}xms; # remove trailing whitespace
1339 $product =~ s{\s+\z}{}xms; # remove trailing whitespace
1340
1341 # Calculate human readable capacity
1342 $capacity = ceil($capacity / 1000**3) >= 1000
1343 ? sprintf '%.1fTB', ($capacity / 1000**4)
1344 : sprintf '%.0fGB', ($capacity / 1000**3);
1345 $capacity = '450GB' if $capacity eq '449GB'; # quick fix for 450GB disks
1346 $capacity = '146GB' if $capacity eq '147GB'; # quick fix for 146GB disks
1347 $capacity = '300GB' if $capacity eq '299GB'; # quick fix for 146GB disks
1348
1349 # Capitalize only the first letter of the vendor name
1350 $vendor = (substr $vendor, 0, 1) . lc (substr $vendor, 1, length $vendor);
1351
1352 # Remove unnecessary trademark rubbish from vendor name
1353 $vendor =~ s{\(tm\)\z}{}xms;
1354
1355 # Special case: Failure predicted
1356 if ($status eq 'Non-Critical' and $fpred) {
1ea483c4 1357 my $msg = sprintf '%s [%s %s, %s] on ctrl %d needs attention: Failure Predicted',
669797e1 1358 $name, $vendor, $product, $capacity, $ctrl;
1359 report('storage', $msg, $E_WARNING, $nexus);
1360 }
1361 # Special case: Rebuilding
1362 elsif ($state eq 'Rebuilding') {
1ea483c4 1363 my $msg = sprintf '%s [%s] on ctrl %d is %s%s',
669797e1 1364 $name, $capacity, $ctrl, $state, $progr;
1365 report('storage', $msg, $E_WARNING, $nexus);
1366 }
1367 # Default
1368 elsif ($status ne 'Ok') {
1ea483c4 1369 my $msg = sprintf '%s [%s %s, %s] on ctrl %d needs attention: %s',
669797e1 1370 $name, $vendor, $product, $capacity, $ctrl, $state;
1371 report('storage', $msg, $status2nagios{$status}, $nexus);
1372 }
1373 # Ok
1374 else {
1ea483c4 1375 my $msg = sprintf '%s [%s] on ctrl %d is %s',
669797e1 1376 $name, $capacity, $ctrl, $state;
1377 report('storage', $msg, $E_OK, $nexus);
1378 }
1379 }
1380 return;
1381}
1382
1383
1384#-----------------------------------------
1385# STORAGE: Check logical drives
1386#-----------------------------------------
1387sub check_virtual_disks {
1388 return if $#controllers == -1;
1389
1390 my $id = undef;
25d04c34 1391 my $name = undef;
669797e1 1392 my $nexus = undef;
1393 my $dev = undef;
1394 my $state = undef;
1395 my $status = undef;
1396 my $layout = undef;
1397 my $size = undef;
1398 my $progr = undef;
25d04c34 1399 my $ctrl = undef;
669797e1 1400 my @output = ();
1401
1402 if ($snmp) {
1403 my %vdisk_oid
1404 = (
669797e1 1405 '1.3.6.1.4.1.674.10893.1.20.140.1.1.3' => 'virtualDiskDeviceName',
1406 '1.3.6.1.4.1.674.10893.1.20.140.1.1.4' => 'virtualDiskState',
1407 '1.3.6.1.4.1.674.10893.1.20.140.1.1.6' => 'virtualDiskLengthInMB',
1408 '1.3.6.1.4.1.674.10893.1.20.140.1.1.13' => 'virtualDiskLayout',
25d04c34 1409 '1.3.6.1.4.1.674.10893.1.20.140.1.1.17' => 'virtualDiskTargetID',
669797e1 1410 '1.3.6.1.4.1.674.10893.1.20.140.1.1.20' => 'virtualDiskComponentStatus',
1411 '1.3.6.1.4.1.674.10893.1.20.140.1.1.21' => 'virtualDiskNexusID',
1412 );
1413 my $result = $snmp_session->get_entries(-columns => [keys %vdisk_oid]);
1414
1415 # No logical drives is OK
1416 return if !defined $result;
1417
1418 @output = @{ get_snmp_output($result, \%vdisk_oid) };
1419 }
1420 else {
1421 foreach my $c (@controllers) {
1422 push @output, @{ run_omreport("storage vdisk controller=$c") };
1423 map_item('ctrl', $c, \@output);
1424 }
1425 }
1426
1427 my %vdisk_state
1428 = (
1429 0 => 'Unknown',
1430 1 => 'Ready',
1431 2 => 'Failed',
1432 3 => 'Online',
1433 4 => 'Offline',
1434 6 => 'Degraded',
1435 15 => 'Resynching',
1436 16 => 'Regenerating',
1437 24 => 'Rebuilding',
1438 26 => 'Formatting',
1439 32 => 'Reconstructing',
1440 35 => 'Initializing',
1441 36 => 'Background Initialization',
1442 38 => 'Resynching Paused',
1443 52 => 'Permanently Degraded',
1444 54 => 'Degraded Redundancy',
1445 );
1446
1447 my %vdisk_layout
1448 = (
1449 1 => 'Concatenated',
1450 2 => 'RAID-0',
1451 3 => 'RAID-1',
1452 7 => 'RAID-5',
1453 8 => 'RAID-6',
1454 10 => 'RAID-10',
1455 12 => 'RAID-50',
1456 19 => 'Concatenated RAID 1',
1457 24 => 'RAID-60',
1458 );
1459
1460 # Check virtual disks on each of the controllers
1461 VDISK:
1462 foreach my $out (@output) {
1463 if ($snmp) {
25d04c34 1464 $id = $out->{virtualDiskTargetID};
669797e1 1465 $dev = $out->{virtualDiskDeviceName};
1466 $state = $vdisk_state{$out->{virtualDiskState}};
1467 $status = $snmp_status{$out->{virtualDiskComponentStatus}};
1468 $layout = $vdisk_layout{$out->{virtualDiskLayout}};
1469 $size = sprintf '%.2f GB', $out->{virtualDiskLengthInMB} / 1024;
1470 $progr = q{}; # can't get this from SNMP(?)
1471 $nexus = convert_nexus($out->{virtualDiskNexusID});
25d04c34 1472 $ctrl = $nexus; # We use the nexus id to get the controller id
1473 $ctrl =~ s{\A (\d+):\d+ \z}{$1}xms;
669797e1 1474 }
1475 else {
1476 $id = $out->{ID};
1477 $dev = $out->{'Device Name'};
1478 $state = $out->{State};
1479 $status = $out->{Status};
1480 $layout = $out->{Layout};
1481 $size = $out->{Size};
1482 $progr = ' [' . $out->{Progress} . ']';
1483 $size =~ s{\A (.*GB).* \z}{$1}xms;
1484 $nexus = join q{:}, $out->{ctrl}, $id;
25d04c34 1485 $ctrl = $out->{ctrl};
669797e1 1486 }
1487
1488 next VDISK if blacklisted('vdisk', $nexus);
1489 $count{vdisk}++;
1490
1491 # Special case: Regenerating
1492 if ($state eq 'Regenerating') {
98b224a3 1493 my $msg = sprintf q{Logical drive %d '%s' [%s, %s] on ctrl %d is %s%s},
1494 $id, $dev, $layout, $size, $ctrl, $state, $progr;
669797e1 1495 report('storage', $msg, $E_WARNING, $nexus);
1496 }
1497 # Default
1498 elsif ($status ne 'Ok') {
98b224a3 1499 my $msg = sprintf q{Logical drive %d '%s' [%s, %s] on ctrl %d needs attention: %s},
1500 $id, $dev, $layout, $size, $ctrl, $state;
669797e1 1501 report('storage', $msg, $status2nagios{$status}, $nexus);
1502 }
1503 # Ok
1504 else {
98b224a3 1505 my $msg = sprintf q{Logical drive %d '%s' [%s, %s] on ctrl %d is %s},
1506 $id, $dev, $layout, $size, $ctrl, $state;
669797e1 1507 report('storage', $msg, $E_OK, $nexus);
1508 }
1509 }
1510 return;
1511}
1512
1513
1514#-----------------------------------------
1515# STORAGE: Check cache batteries
1516#-----------------------------------------
1517sub check_cache_battery {
1518 return if $#controllers == -1;
1519
1520 my $id = undef;
1521 my $nexus = undef;
1522 my $state = undef;
1523 my $status = undef;
1524 my $ctrl = undef;
1525 my $learn = undef; # learn state
1526 my $pred = undef; # battery's ability to be charged
1527 my @output = ();
1528
1529 if ($snmp) {
1530 my %bat_oid
1531 = (
669797e1 1532 '1.3.6.1.4.1.674.10893.1.20.130.15.1.4' => 'batteryState',
1533 '1.3.6.1.4.1.674.10893.1.20.130.15.1.6' => 'batteryComponentStatus',
1534 '1.3.6.1.4.1.674.10893.1.20.130.15.1.9' => 'batteryNexusID',
1535 '1.3.6.1.4.1.674.10893.1.20.130.15.1.10' => 'batteryPredictedCapacity',
1536 '1.3.6.1.4.1.674.10893.1.20.130.15.1.12' => 'batteryLearnState',
1537 '1.3.6.1.4.1.674.10893.1.20.130.16.1.5' => 'batteryConnectionControllerNumber',
1538 );
1539 my $result = $snmp_session->get_entries(-columns => [keys %bat_oid]);
1540
1541 # No cache battery is OK
1542 return if !defined $result;
1543
1544 @output = @{ get_snmp_output($result, \%bat_oid) };
1545 }
1546 else {
1547 foreach my $c (@controllers) {
1548 push @output, @{ run_omreport("storage battery controller=$c") };
1549 map_item('ctrl', $c, \@output);
1550 }
1551 }
1552
1553 my %bat_state
1554 = (
1555 0 => 'Unknown',
1556 1 => 'Ready',
1557 2 => 'Failed',
1558 6 => 'Degraded',
1559 7 => 'Reconditioning',
1560 9 => 'High',
1561 10 => 'Power Low',
1562 12 => 'Charging',
1563 21 => 'Missing',
1564 36 => 'Learning',
1565 );
1566
1567 my %bat_learn_state
1568 = (
1569 1 => 'Failed',
1570 2 => 'Active',
1571 4 => 'Timed out',
1572 8 => 'Requested',
1573 16 => 'Idle',
1574 );
1575
1576 my %bat_pred_cap
1577 = (
1578 1 => 'Failed', # The battery cannot be charged and needs to be replaced
1579 2 => 'Ready', # The battery can be charged to full capacity
1580 4 => 'Unknown', # The battery is completing a Learn cycle. The charge capacity of the
1581 # battery cannot be determined until the Learn cycle is complete
1582 );
1583
1584 # Check battery on each of the controllers
1585 BATTERY:
1586 foreach my $out (@output) {
1587 if ($snmp) {
669797e1 1588 $state = $bat_state{$out->{batteryState}};
1589 $status = $snmp_status{$out->{batteryComponentStatus}};
1590 $learn = exists $out->{batteryLearnState}
1591 ? $bat_learn_state{$out->{batteryLearnState}} : undef;
1592 $pred = exists $out->{batteryPredictedCapacity}
1593 ? $bat_pred_cap{$out->{batteryPredictedCapacity}} : undef;
1594 $ctrl = $out->{batteryConnectionControllerNumber} - 1;
1595 $nexus = convert_nexus($out->{batteryNexusID});
25d04c34 1596 $id = $nexus;
1597 $id =~ s{\A \d+:(\d+) \z}{$1}xms;
669797e1 1598 }
1599 else {
1600 $id = $out->{'ID'};
1601 $state = $out->{'State'};
1602 $status = $out->{'Status'};
1603 $learn = $out->{'Learn State'};
1604 $pred = $out->{'Predicted Capacity Status'};
1605 $ctrl = $out->{'ctrl'};
1606 $nexus = join q{:}, $out->{ctrl}, $id;
1607 }
1608
1609 next BATTERY if blacklisted('bat', $nexus);
1610
1611 # Special case: Charging
1612 if ($state eq 'Charging') {
1613 my $msg = sprintf 'Cache battery %d in controller %d is %s (%s) [probably harmless]',
1614 $id, $ctrl, $state, $pred;
1615 report('storage', $msg, $E_WARNING, $nexus);
1616 }
1617 # Special case: Learning (battery learns its capacity)
1618 elsif ($state eq 'Learning') {
1619 my $msg = sprintf 'Cache battery %d in controller %d is %s (%s) [probably harmless]',
1620 $id, $ctrl, $state, $learn;
1621 report('storage', $msg, $E_WARNING, $nexus);
1622 }
1623 # Special case: Power Low (first part of recharge cycle)
1624 elsif ($state eq 'Power Low') {
1625 my $msg = sprintf 'Cache battery %d in controller %d is %s [probably harmless]',
1626 $id, $ctrl, $state;
1627 report('storage', $msg, $E_WARNING, $nexus);
1628 }
1629 # Default
1630 elsif ($status ne 'Ok') {
1631 my $msg = sprintf 'Cache battery %d in controller %d needs attention: %s (%s)',
1632 $id, $ctrl, $state, $status;
1633 report('storage', $msg, $status2nagios{$status}, $nexus);
1634 }
1635 # Ok
1636 else {
1637 my $msg = sprintf 'Cache battery %d in controller %d is %s',
1638 $id, $ctrl, $state;
1639 report('storage', $msg, $E_OK, $nexus);
1640 }
1641 }
1642 return;
1643}
1644
1645
1646#-----------------------------------------
1647# STORAGE: Check connectors (channels)
1648#-----------------------------------------
1649sub check_connectors {
1650 return if $#controllers == -1;
1651
1652 my $id = undef;
1653 my $nexus = undef;
1654 my $name = undef;
1655 my $state = undef;
1656 my $status = undef;
1657 my $type = undef;
1658 my $ctrl = undef;
1659 my @output = ();
1660
1661 if ($snmp) {
1662 my %conn_oid
1663 = (
1664 '1.3.6.1.4.1.674.10893.1.20.130.2.1.1' => 'channelNumber',
1665 '1.3.6.1.4.1.674.10893.1.20.130.2.1.2' => 'channelName',
1666 '1.3.6.1.4.1.674.10893.1.20.130.2.1.3' => 'channelState',
1667 '1.3.6.1.4.1.674.10893.1.20.130.2.1.8' => 'channelComponentStatus',
1668 '1.3.6.1.4.1.674.10893.1.20.130.2.1.9' => 'channelNexusID',
1669 '1.3.6.1.4.1.674.10893.1.20.130.2.1.11' => 'channelBusType',
1670 );
1671 my $result = $snmp_session->get_entries(-columns => [keys %conn_oid]);
1672
1673 if (!defined $result) {
98b224a3 1674 printf "SNMP ERROR [storage / channel]: %s.\n", $snmp_session->error;
669797e1 1675 $snmp_session->close;
1676 exit $E_UNKNOWN;
1677 }
1678
1679 @output = @{ get_snmp_output($result, \%conn_oid) };
1680 }
1681 else {
1682 foreach my $c (@controllers) {
1683 push @output, @{ run_omreport("storage connector controller=$c") };
1684 map_item('ctrl', $c, \@output);
1685 }
1686 }
1687
1688 my %conn_state
1689 = (
1690 0 => 'Unknown',
1691 1 => 'Ready',
1692 2 => 'Failed',
1693 3 => 'Online',
1694 4 => 'Offline',
1695 6 => 'Degraded',
1696 );
1697
1698 my %conn_bustype
1699 = (
1700 1 => 'SCSI',
1701 2 => 'IDE',
1702 3 => 'Fibre Channel',
1703 4 => 'SSA',
1704 6 => 'USB',
1705 7 => 'SATA',
1706 8 => 'SAS',
1707 );
1708
1709 # Check connectors on each of the controllers
1710 CHANNEL:
1711 foreach my $out (@output) {
1712 if ($snmp) {
1713 $id = $out->{channelNumber} - 1;
1714 $name = $out->{channelName};
1715 $state = $conn_state{$out->{channelState}};
1716 $status = $snmp_status{$out->{channelComponentStatus}};
1717 $type = $conn_bustype{$out->{channelBusType}};
1718 $nexus = convert_nexus($out->{channelNexusID});
1719 $ctrl = $nexus;
1720 $ctrl =~ s{(\d+):\d+}{$1}xms;
1721 }
1722 else {
1723 $id = $out->{'ID'};
1724 $name = $out->{'Name'};
1725 $state = $out->{'State'};
1726 $status = $out->{'Status'};
1727 $type = $out->{'Connector Type'};
1728 $ctrl = $out->{ctrl};
1729 $nexus = join q{:}, $out->{ctrl}, $id;
1730 }
1731
1732 next CHANNEL if blacklisted('conn', $nexus);
1733
98b224a3 1734 my $msg = sprintf '%s [%s] on controller %d is %s',
669797e1 1735 $name, $type, $ctrl, $state;
1736 report('storage', $msg, $status2nagios{$status}, $nexus);
1737 }
1738 return;
1739}
1740
1741
1742#-----------------------------------------
1743# STORAGE: Check enclosures
1744#-----------------------------------------
1745sub check_enclosures {
1746 my $id = undef;
1747 my $nexus = undef;
1748 my $name = undef;
1749 my $state = undef;
1750 my $status = undef;
1751 my $firmware = undef;
25d04c34 1752 my $ctrl = undef;
669797e1 1753 my @output = ();
1754
1755 if ($snmp) {
1756 my %encl_oid
1757 = (
1758 '1.3.6.1.4.1.674.10893.1.20.130.3.1.1' => 'enclosureNumber',
1759 '1.3.6.1.4.1.674.10893.1.20.130.3.1.2' => 'enclosureName',
1760 '1.3.6.1.4.1.674.10893.1.20.130.3.1.4' => 'enclosureState',
1761 '1.3.6.1.4.1.674.10893.1.20.130.3.1.19' => 'enclosureChannelNumber',
1762 '1.3.6.1.4.1.674.10893.1.20.130.3.1.24' => 'enclosureComponentStatus',
1763 '1.3.6.1.4.1.674.10893.1.20.130.3.1.25' => 'enclosureNexusID',
1764 '1.3.6.1.4.1.674.10893.1.20.130.3.1.26' => 'enclosureFirmwareVersion',
1765 );
1766 my $result = $snmp_session->get_entries(-columns => [keys %encl_oid]);
1767
1768 # No enclosures is OK
1769 return if !defined $result;
1770
1771 @output = @{ get_snmp_output($result, \%encl_oid) };
1772 }
1773 else {
1774 foreach my $c (@controllers) {
1775 push @output, @{ run_omreport("storage enclosure controller=$c") };
1776 map_item('ctrl', $c, \@output);
1777 }
1778 }
1779
1780 my %encl_state
1781 = (
1782 0 => 'Unknown',
1783 1 => 'Ready',
1784 2 => 'Failed',
1785 3 => 'Online',
1786 4 => 'Offline',
1787 6 => 'Degraded',
1788 );
1789
1790 ENCLOSURE:
1791 foreach my $out (@output) {
1792 if ($snmp) {
1793 $id = $out->{'enclosureNumber'} - 1;
1794 $name = $out->{'enclosureName'};
1795 $state = $encl_state{$out->{'enclosureState'}};
1796 $status = $snmp_status{$out->{'enclosureComponentStatus'}};
1797 $firmware = exists $out->{enclosureFirmwareVersion}
1798 ? $out->{enclosureFirmwareVersion} : 'N/A';
1799 $nexus = convert_nexus($out->{enclosureNexusID});
25d04c34 1800 $ctrl = $nexus;
1801 $ctrl =~ s{\A (\d+):.* \z}{$1}xms;
669797e1 1802 }
1803 else {
1804 $id = $out->{ID};
1805 $name = $out->{Name};
1806 $state = $out->{State};
1807 $status = $out->{Status};
1808 $firmware = $out->{'Firmware Version'} ne 'Not Applicable'
1809 ? $out->{'Firmware Version'} : 'N/A';
1810 $nexus = join q{:}, $out->{ctrl}, $id;
25d04c34 1811 $ctrl = $out->{ctrl};
669797e1 1812 }
1813
1814 $name =~ s{\s+\z}{}xms; # remove trailing whitespace
1815 $firmware =~ s{\s+\z}{}xms; # remove trailing whitespace
1816
1817 # store enclosure data for future use
1818 push @enclosures, { 'id' => $id,
1819 'ctrl' => $out->{ctrl},
1820 'name' => $name };
1821
1822 # Collecting some storage info
1823 $sysinfo{'enclosure'}{$nexus}{'id'} = $nexus;
1824 $sysinfo{'enclosure'}{$nexus}{'name'} = $name;
1825 $sysinfo{'enclosure'}{$nexus}{'firmware'} = $firmware;
1826
1827 next ENCLOSURE if blacklisted('encl', $nexus);
1828
98b224a3 1829 my $msg = sprintf 'Enclosure %s [%s] on controller %d is %s',
25d04c34 1830 $nexus, $name, $ctrl, $state;
669797e1 1831 report('storage', $msg, $status2nagios{$status}, $nexus);
1832 }
1833 return;
1834}
1835
1836
1837#-----------------------------------------
1838# STORAGE: Check enclosure fans
1839#-----------------------------------------
1840sub check_enclosure_fans {
1841 return if $#controllers == -1;
1842
1843 my $id = undef;
1844 my $nexus = undef;
1845 my $name = undef;
1846 my $state = undef;
1847 my $status = undef;
1848 my $speed = undef;
1849 my $encl_id = undef;
1850 my $encl_name = undef;
1851 my @output = ();
1852
1853 if ($snmp) {
1854 my %fan_oid
1855 = (
1856 '1.3.6.1.4.1.674.10893.1.20.130.7.1.1' => 'fanNumber',
1857 '1.3.6.1.4.1.674.10893.1.20.130.7.1.2' => 'fanName',
1858 '1.3.6.1.4.1.674.10893.1.20.130.7.1.4' => 'fanState',
1859 '1.3.6.1.4.1.674.10893.1.20.130.7.1.11' => 'fanProbeCurrValue',
1860 '1.3.6.1.4.1.674.10893.1.20.130.7.1.15' => 'fanComponentStatus',
1861 '1.3.6.1.4.1.674.10893.1.20.130.7.1.16' => 'fanNexusID',
1862 '1.3.6.1.4.1.674.10893.1.20.130.8.1.4' => 'fanConnectionEnclosureName',
1863 '1.3.6.1.4.1.674.10893.1.20.130.8.1.5' => 'fanConnectionEnclosureNumber',
1864 );
1865
1866 my $result = $snmp_session->get_entries(-columns => [keys %fan_oid]);
1867
1868 # No enclosure fans is OK
1869 return if !defined $result;
1870
1871 @output = @{ get_snmp_output($result, \%fan_oid) };
1872 }
1873 else {
1874 foreach my $enc (@enclosures) {
1875 push @output, @{ run_omreport("storage enclosure controller=$enc->{ctrl} enclosure=$enc->{id} info=fans") };
1876 map_item('ctrl', $enc->{ctrl}, \@output);
1877 map_item('encl_id', $enc->{id}, \@output);
1878 map_item('encl_name', $enc->{name}, \@output);
1879 }
1880 }
1881
1882 my %fan_state
1883 = (
1884 0 => 'Unknown',
1885 1 => 'Ready',
1886 2 => 'Failed',
1887 3 => 'Online',
1888 4 => 'Offline',
1889 6 => 'Degraded',
1890 21 => 'Missing',
1891 );
1892
1893 # Check fans on each of the enclosures
1894 FAN:
1895 foreach my $out (@output) {
1896 if ($snmp) {
1897 $id = $out->{fanNumber} - 1;
1898 $name = $out->{fanName};
1899 $state = $fan_state{$out->{fanState}};
1900 $status = $snmp_status{$out->{fanComponentStatus}};
1901 $speed = $out->{fanProbeCurrValue};
1902 $encl_id = $out->{fanConnectionEnclosureNumber} - 1;
1903 $encl_name = $out->{fanConnectionEnclosureName};
1904 $nexus = convert_nexus($out->{fanNexusID});
1905 }
1906 else {
1907 $id = $out->{'ID'};
1908 $name = $out->{'Name'};
1909 $state = $out->{'State'};
1910 $status = $out->{'Status'};
1911 $speed = $out->{'Speed'};
1912 $encl_id = join q{:}, $out->{ctrl}, $out->{'encl_id'};
1913 $encl_name = $out->{encl_name};
1914 $nexus = join q{:}, $out->{ctrl}, $out->{'encl_id'}, $id;
1915 }
1916
1917 next FAN if blacklisted('encl_fan', $nexus);
1918
1919 # Default
1920 if ($status ne 'Ok') {
98b224a3 1921 my $msg = sprintf '%s in enclosure %s [%s] needs attention: %s',
669797e1 1922 $name, $encl_id, $encl_name, $state;
1923 report('storage', $msg, $status2nagios{$status}, $nexus);
1924 }
1925 # Ok
1926 else {
98b224a3 1927 my $msg = sprintf '%s in enclosure %s [%s] is %s (speed=%s)',
669797e1 1928 $name, $encl_id, $encl_name, $state, $speed;
1929 report('storage', $msg, $E_OK, $nexus);
1930 }
1931 }
1932 return;
1933}
1934
1935
1936#-----------------------------------------
1937# STORAGE: Check enclosure power supplies
1938#-----------------------------------------
1939sub check_enclosure_pwr {
1940 return if $#controllers == -1;
1941
1942 my $id = undef;
1943 my $nexus = undef;
1944 my $name = undef;
1945 my $state = undef;
1946 my $status = undef;
1947 my $encl_id = undef;
1948 my $encl_name = undef;
1949 my @output = ();
1950
1951 if ($snmp) {
1952 my %ps_oid
1953 = (
1954 '1.3.6.1.4.1.674.10893.1.20.130.9.1.1' => 'powerSupplyNumber',
1955 '1.3.6.1.4.1.674.10893.1.20.130.9.1.2' => 'powerSupplyName',
1956 '1.3.6.1.4.1.674.10893.1.20.130.9.1.4' => 'powerSupplyState',
1957 '1.3.6.1.4.1.674.10893.1.20.130.9.1.9' => 'powerSupplyComponentStatus',
1958 '1.3.6.1.4.1.674.10893.1.20.130.9.1.10' => 'powerSupplyNexusID',
1959 '1.3.6.1.4.1.674.10893.1.20.130.10.1.4' => 'powerSupplyConnectionEnclosureName',
1960 '1.3.6.1.4.1.674.10893.1.20.130.10.1.5' => 'powerSupplyConnectionEnclosureNumber',
1961 );
1962 my $result = $snmp_session->get_entries(-columns => [keys %ps_oid]);
1963
1964 # No enclosure power supplies is OK
1965 return if !defined $result;
1966
1967 @output = @{ get_snmp_output($result, \%ps_oid) };
1968 }
1969 else {
1970 foreach my $enc (@enclosures) {
1971 push @output, @{ run_omreport("storage enclosure controller=$enc->{ctrl} enclosure=$enc->{id} info=pwrsupplies") };
1972 map_item('ctrl', $enc->{ctrl}, \@output);
1973 map_item('encl_id', $enc->{id}, \@output);
1974 map_item('encl_name', $enc->{name}, \@output);
1975 }
1976 }
1977
1978 my %ps_state
1979 = (
1980 0 => 'Unknown',
1981 1 => 'Ready',
1982 2 => 'Failed',
1983 5 => 'Not Installed',
1984 6 => 'Degraded',
1985 11 => 'Removed',
1986 21 => 'Missing',
1987 );
1988
1989 # Check power supplies on each of the enclosures
1990 PS:
1991 foreach my $out (@output) {
1992 if ($snmp) {
1993 $id = $out->{powerSupplyNumber};
1994 $name = $out->{powerSupplyName};
1995 $state = $ps_state{$out->{powerSupplyState}};
1996 $status = $snmp_status{$out->{powerSupplyComponentStatus}};
1997 $encl_id = $out->{powerSupplyConnectionEnclosureNumber} - 1;
1998 $encl_name = $out->{powerSupplyConnectionEnclosureName};
1999 $nexus = convert_nexus($out->{powerSupplyNexusID});
2000 }
2001 else {
2002 $id = $out->{'ID'};
2003 $name = $out->{'Name'};
2004 $state = $out->{'State'};
2005 $status = $out->{'Status'};
2006 $encl_id = join q{:}, $out->{ctrl}, $out->{'encl_id'};
2007 $encl_name = $out->{encl_name};
2008 $nexus = join q{:}, $out->{ctrl}, $out->{'encl_id'}, $id;
2009 }
2010
2011 next PS if blacklisted('encl_ps', $nexus);
2012
2013 # Default
2014 if ($status ne 'Ok') {
98b224a3 2015 my $msg = sprintf '%s in enclosure %s [%s] needs attention: %s',
669797e1 2016 $name, $encl_id, $encl_name, $state;
2017 report('storage', $msg, $status2nagios{$status}, $nexus);
2018 }
2019 # Ok
2020 else {
98b224a3 2021 my $msg = sprintf '%s in enclosure %s [%s] is %s',
669797e1 2022 $name, $encl_id, $encl_name, $state;
2023 report('storage', $msg, $E_OK, $nexus);
2024 }
2025 }
2026 return;
2027}
2028
2029
2030#-----------------------------------------
2031# STORAGE: Check enclosure temperatures
2032#-----------------------------------------
2033sub check_enclosure_temp {
2034 return if $#controllers == -1;
2035
2036 my $id = undef;
2037 my $nexus = undef;
2038 my $name = undef;
2039 my $state = undef;
2040 my $status = undef;
2041 my $reading = undef;
2042 my $unit = undef;
2043 my $max_warn = undef;
2044 my $max_crit = undef;
2045 my $encl_id = undef;
2046 my $encl_name = undef;
2047 my @output = ();
2048
2049 if ($snmp) {
2050 my %temp_oid
2051 = (
2052 '1.3.6.1.4.1.674.10893.1.20.130.11.1.1' => 'temperatureProbeNumber',
2053 '1.3.6.1.4.1.674.10893.1.20.130.11.1.2' => 'temperatureProbeName',
2054 '1.3.6.1.4.1.674.10893.1.20.130.11.1.4' => 'temperatureProbeState',
2055 '1.3.6.1.4.1.674.10893.1.20.130.11.1.6' => 'temperatureProbeUnit',
2056 '1.3.6.1.4.1.674.10893.1.20.130.11.1.9' => 'temperatureProbeMaxWarning',
2057 '1.3.6.1.4.1.674.10893.1.20.130.11.1.10' => 'temperatureProbeMaxCritical',
2058 '1.3.6.1.4.1.674.10893.1.20.130.11.1.11' => 'temperatureProbeCurValue',
2059 '1.3.6.1.4.1.674.10893.1.20.130.11.1.13' => 'temperatureProbeComponentStatus',
2060 '1.3.6.1.4.1.674.10893.1.20.130.11.1.14' => 'temperatureProbeNexusID',
2061 '1.3.6.1.4.1.674.10893.1.20.130.12.1.4' => 'temperatureConnectionEnclosureName',
2062 '1.3.6.1.4.1.674.10893.1.20.130.12.1.5' => 'temperatureConnectionEnclosureNumber',
2063 );
2064 my $result = $snmp_session->get_entries(-columns => [keys %temp_oid]);
2065
2066 # No enclosure temperature probes is OK
2067 return if !defined $result;
2068
2069 @output = @{ get_snmp_output($result, \%temp_oid) };
2070 }
2071 else {
2072 foreach my $enc (@enclosures) {
2073 push @output, @{ run_omreport("storage enclosure controller=$enc->{ctrl} enclosure=$enc->{id} info=temps") };
2074 map_item('ctrl', $enc->{ctrl}, \@output);
2075 map_item('encl_id', $enc->{id}, \@output);
2076 map_item('encl_name', $enc->{name}, \@output);
2077 }
2078 }
2079
2080 my %temp_state
2081 = (
2082 0 => 'Unknown',
2083 1 => 'Ready',
2084 2 => 'Failed',
2085 4 => 'Offline',
2086 6 => 'Degraded',
2087 9 => 'Inactive',
2088 21 => 'Missing',
2089 );
2090
2091 # Check temperature probes on each of the enclosures
2092 TEMP:
2093 foreach my $out (@output) {
2094 if ($snmp) {
2095 $id = $out->{temperatureProbeNumber} - 1;
2096 $name = $out->{temperatureProbeName};
2097 $state = $temp_state{$out->{temperatureProbeState}};
2098 $status = $snmp_status{$out->{temperatureProbeComponentStatus}};
2099 $unit = $out->{temperatureProbeUnit};
2100 $reading = $out->{temperatureProbeCurValue};
2101 $max_warn = $out->{temperatureProbeMaxWarning};
2102 $max_crit = $out->{temperatureProbeMaxCritical};
2103 $encl_id = $out->{temperatureConnectionEnclosureNumber} - 1;
2104 $encl_name = $out->{temperatureConnectionEnclosureName};
2105 $nexus = convert_nexus($out->{temperatureProbeNexusID});
2106 }
2107 else {
2108 $id = $out->{'ID'};
2109 $name = $out->{'Name'};
2110 $state = $out->{'State'};
2111 $status = $out->{'Status'};
2112 $unit = 'FIXME';
2113 $reading = $out->{'Reading'}; $reading =~ s{\s*C}{}xms;
2114 $max_warn = $out->{'Maximum Warning Threshold'}; $max_warn =~ s{\s*C}{}xms;
2115 $max_crit = $out->{'Maximum Failure Threshold'}; $max_crit =~ s{\s*C}{}xms;
2116 $encl_id = join q{:}, $out->{ctrl}, $out->{'encl_id'};
2117 $encl_name = $out->{encl_name};
2118 $nexus = join q{:}, $out->{ctrl}, $out->{'encl_id'}, $id;
2119 }
2120
2121 next TEMP if blacklisted('encl_temp', $nexus);
2122
2123 # Default
2124 if ($status ne 'Ok') {
8a565bfc 2125 my $msg = sprintf '%s in enclosure %s [%s] is %s C at %s (%s max)',
669797e1 2126 $name, $encl_id, $encl_name, $state, $reading, $max_crit;
2127 report('storage', $msg, $status2nagios{$status}, $nexus);
2128 }
2129 # Ok
2130 else {
8a565bfc 2131 my $msg = sprintf '%s in enclosure %s [%s]: %s C (%s max)',
669797e1 2132 $name, $encl_id, $encl_name, $reading, $max_crit;
2133 report('storage', $msg, $E_OK, $nexus);
2134 }
2135
2136 # Collect performance data
2137 if (defined $opt{perfdata}) {
2138 $name =~ s{\A Temperature\sProbe\s(\d+) \z}{temp_$1}gxms;
2139 my $pkey = "enclosure_${encl_id}_${name}";
2140 my $pval = join q{;}, "${reading}C", $max_warn, $max_crit;
2141 $perfdata{$pkey} = $pval;
2142 }
2143 }
2144 return;
2145}
2146
2147
2148#-----------------------------------------
2149# STORAGE: Check enclosure management modules (EMM)
2150#-----------------------------------------
2151sub check_enclosure_emms {
2152 return if $#controllers == -1;
2153
2154 my $id = undef;
2155 my $nexus = undef;
2156 my $name = undef;
2157 my $state = undef;
2158 my $status = undef;
2159 my $encl_id = undef;
2160 my $encl_name = undef;
2161 my @output = ();
2162
2163 if ($snmp) {
2164 my %emms_oid
2165 = (
2166 '1.3.6.1.4.1.674.10893.1.20.130.13.1.1' => 'enclosureManagementModuleNumber',
2167 '1.3.6.1.4.1.674.10893.1.20.130.13.1.2' => 'enclosureManagementModuleName',
2168 '1.3.6.1.4.1.674.10893.1.20.130.13.1.4' => 'enclosureManagementModuleState',
2169 '1.3.6.1.4.1.674.10893.1.20.130.13.1.11' => 'enclosureManagementModuleComponentStatus',
2170 '1.3.6.1.4.1.674.10893.1.20.130.13.1.12' => 'enclosureManagementModuleNexusID',
2171 '1.3.6.1.4.1.674.10893.1.20.130.14.1.4' => 'enclosureManagementModuleConnectionEnclosureName',
2172 '1.3.6.1.4.1.674.10893.1.20.130.14.1.5' => 'enclosureManagementModuleConnectionEnclosureNumber',
2173 );
2174 my $result = $snmp_session->get_entries(-columns => [keys %emms_oid]);
2175
2176 # No enclosure EMMs is OK
2177 return if !defined $result;
2178
2179 @output = @{ get_snmp_output($result, \%emms_oid) };
2180 }
2181 else {
2182 foreach my $enc (@enclosures) {
2183 push @output, @{ run_omreport("storage enclosure controller=$enc->{ctrl} enclosure=$enc->{id} info=emms") };
2184 map_item('ctrl', $enc->{ctrl}, \@output);
2185 map_item('encl_id', $enc->{id}, \@output);
2186 map_item('encl_name', $enc->{name}, \@output);
2187 }
2188 }
2189
2190 my %emms_state
2191 = (
2192 0 => 'Unknown',
2193 1 => 'Ready',
2194 2 => 'Failed',
2195 3 => 'Online',
2196 4 => 'Offline',
2197 5 => 'Not Installed',
2198 6 => 'Degraded',
2199 21 => 'Missing',
2200 );
2201
2202 # Check temperature probes on each of the enclosures
2203 EMM:
2204 foreach my $out (@output) {
2205 if ($snmp) {
2206 $id = $out->{enclosureManagementModuleNumber} - 1;
2207 $name = $out->{enclosureManagementModuleName};
2208 $state = $emms_state{$out->{enclosureManagementModuleState}};
2209 $status = $snmp_status{$out->{enclosureManagementModuleComponentStatus}};
2210 $encl_id = $out->{enclosureManagementModuleConnectionEnclosureNumber} - 1;
2211 $encl_name = $out->{enclosureManagementModuleConnectionEnclosureName};
2212 $nexus = convert_nexus($out->{enclosureManagementModuleNexusID});
2213 }
2214 else {
2215 $id = $out->{'ID'};
2216 $name = $out->{'Name'};
2217 $state = $out->{'State'};
2218 $status = $out->{'Status'};
2219 $encl_id = join q{:}, $out->{ctrl}, $out->{'encl_id'};
2220 $encl_name = $out->{encl_name};
2221 $nexus = join q{:}, $out->{ctrl}, $out->{'encl_id'}, $id;
2222 }
2223
2224 next EMM if blacklisted('encl_emm', $nexus);
2225
2226 # Default
2227 if ($status ne 'Ok') {
98b224a3 2228 my $msg = sprintf '%s in enclosure %s [%s] needs attention: %s',
669797e1 2229 $name, $encl_id, $encl_name, $state;
2230 report('storage', $msg, $status2nagios{$status}, $nexus);
2231 }
2232 # Ok
2233 else {
98b224a3 2234 my $msg = sprintf '%s in enclosure %s [%s] is %s',
669797e1 2235 $name, $encl_id, $encl_name, $state;
2236 report('storage', $msg, $E_OK, $nexus);
2237 }
2238 }
2239 return;
2240}
2241
2242
2243#-----------------------------------------
2244# CHASSIS: Check memory modules
2245#-----------------------------------------
2246sub check_memory {
2247 my $index = undef;
2248 my $status = undef;
2249 my $location = undef;
2250 my $size = undef;
2251 my $modes = undef;
2252 my @failures = ();
2253 my @output = ();
2254
2255 if ($snmp) {
2256 my %dimm_oid
2257 = (
2258 '1.3.6.1.4.1.674.10892.1.1100.50.1.2.1' => 'memoryDeviceIndex',
2259 '1.3.6.1.4.1.674.10892.1.1100.50.1.5.1' => 'memoryDeviceStatus',
2260 '1.3.6.1.4.1.674.10892.1.1100.50.1.8.1' => 'memoryDeviceLocationName',
2261 '1.3.6.1.4.1.674.10892.1.1100.50.1.14.1' => 'memoryDeviceSize',
2262 '1.3.6.1.4.1.674.10892.1.1100.50.1.20.1' => 'memoryDeviceFailureModes',
2263 );
2264 my $result = $snmp_session->get_entries(-columns => [keys %dimm_oid]);
2265
2266 if (!defined $result) {
98b224a3 2267 printf "SNMP ERROR [memory]: %s.\n", $snmp_session->error;
669797e1 2268 $snmp_session->close;
2269 exit $E_UNKNOWN;
2270 }
2271
2272 @output = @{ get_snmp_output($result, \%dimm_oid) };
2273 }
2274 else {
2275 @output = @{ run_omreport("$omopt_chassis memory") };
2276 }
2277
2278 # Note: These values are bit masks, so combination values are
2279 # possible. If value is 0 (zero), memory device has no faults.
2280 my %failure_mode
2281 = (
2282 1 => 'ECC single bit correction warning rate exceeded',
2283 2 => 'ECC single bit correction failure rate exceeded',
2284 4 => 'ECC multibit fault encountered',
2285 8 => 'ECC single bit correction logging disabled',
2286 16 => 'device disabled because of spare activation',
2287 );
2288
2289 DIMM:
2290 foreach my $out (@output) {
2291 @failures = (); # Initialize
2292 if ($snmp) {
2293 $index = $out->{memoryDeviceIndex};
2294 $status = $snmp_status{$out->{memoryDeviceStatus}};
2295 $location = $out->{memoryDeviceLocationName};
2296 $size = sprintf '%d MB', $out->{memoryDeviceSize}/1024;
2297 $modes = $out->{memoryDeviceFailureModes};
2298 if ($modes > 0) {
2299 foreach my $mask (sort keys %failure_mode) {
2300 if (($modes & $mask) != 0) { push @failures, $failure_mode{$mask}; }
2301 }
2302 }
2303 }
2304 else {
2305 $index = $out->{'Type'} eq '[Not Occupied]' ? undef : $out->{'Index'};
2306 $status = $out->{'Status'};
2307 $location = $out->{'Connector Name'};
2308 $size = $out->{'Size'};
2309 if (defined $size) {
2310 $size =~ s{\s\s}{ }gxms;
2311 }
2312 # Run 'omreport chassis memory index=X' to get the failures
2313 if ($status ne 'Ok' && defined $index) {
2314 foreach (@{ run_command("$omreport $omopt_chassis memory index=$index -fmt ssv") }) {
2315 if (m/\A Failures; (.+?) \z/xms) {
2316 chop(my $fail = $1);
2317 push @failures, split m{\.}xms, $fail;
2318 }
2319 }
2320 }
2321 }
2322 $location =~ s{\A \s*(.*?)\s* \z}{$1}xms;
2323
2324 next DIMM if blacklisted('dimm', $index);
2325
2326 # Ignore empty memory slots
2327 next DIMM if !defined $index;
2328 $count{dimm}++;
2329
2330 if ($status ne 'Ok') {
2331 my $msg = undef;
2332 if (scalar @failures == 0) {
98b224a3 2333 $msg = sprintf 'Memory module %d [%s, %s] needs attention (%s)',
669797e1 2334 $index, $location, $size, $status;
2335 }
2336 else {
98b224a3 2337 $msg = sprintf 'Memory module %d [%s, %s] needs attention: %s',
669797e1 2338 $index, $location, $size, (join q{, }, @failures);
2339 }
2340
2341 report('chassis', $msg, $status2nagios{$status}, $index);
2342 }
2343 # Ok
2344 else {
98b224a3 2345 my $msg = sprintf 'Memory module %d [%s, %s] is %s',
669797e1 2346 $index, $location, $size, $status;
2347 report('chassis', $msg, $E_OK, $index);
2348 }
2349 }
2350 return;
2351}
2352
2353
2354#-----------------------------------------
2355# CHASSIS: Check fans
2356#-----------------------------------------
2357sub check_fans {
2358 my $index = undef;
2359 my $status = undef;
2360 my $reading = undef;
2361 my $location = undef;
2362 my $max_crit = undef;
2363 my $max_warn = undef;
2364 my @output = ();
2365
2366 if ($snmp) {
2367 my %cool_oid
2368 = (
2369 '1.3.6.1.4.1.674.10892.1.700.12.1.2.1' => 'coolingDeviceIndex',
2370 '1.3.6.1.4.1.674.10892.1.700.12.1.5.1' => 'coolingDeviceStatus',
2371 '1.3.6.1.4.1.674.10892.1.700.12.1.6.1' => 'coolingDeviceReading',
2372 '1.3.6.1.4.1.674.10892.1.700.12.1.8.1' => 'coolingDeviceLocationName',
2373 '1.3.6.1.4.1.674.10892.1.700.12.1.10.1' => 'coolingDeviceUpperCriticalThreshold',
2374 '1.3.6.1.4.1.674.10892.1.700.12.1.11.1' => 'coolingDeviceUpperNonCriticalThreshold',
2375 );
2376 my $result = $snmp_session->get_entries(-columns => [keys %cool_oid]);
2377
2378 if ($blade && !defined $result) {
2379 return 0;
2380 }
2381 elsif (!$blade && !defined $result) {
98b224a3 2382 printf "SNMP ERROR [cooling]: %s.\n", $snmp_session->error;
669797e1 2383 $snmp_session->close;
2384 exit $E_UNKNOWN;
2385 }
2386
2387 @output = @{ get_snmp_output($result, \%cool_oid) };
2388 }
2389 else {
2390 @output = @{ run_omreport("$omopt_chassis fans") };
2391 }
2392
2393 FAN:
2394 foreach my $out (@output) {
2395 if ($snmp) {
2396 $index = $out->{coolingDeviceIndex};
2397 $status = $snmp_probestatus{$out->{coolingDeviceStatus}};
2398 $reading = $out->{coolingDeviceReading};
2399 $location = $out->{coolingDeviceLocationName};
2400 $max_crit = exists $out->{coolingDeviceUpperCriticalThreshold}
2401 ? $out->{coolingDeviceUpperCriticalThreshold} : 0;
2402 $max_warn = exists $out->{coolingDeviceUpperNonCriticalThreshold}
2403 ? $out->{coolingDeviceUpperNonCriticalThreshold} : 0;
2404 }
2405 else {
2406 $index = $out->{'Index'};
2407 $status = $out->{'Status'};
2408 $reading = $out->{'Reading'};
2409 $location = $out->{'Probe Name'};
2410 $max_crit = $out->{'Maximum Failure Threshold'} ne '[N/A]'
2411 ? $out->{'Maximum Failure Threshold'} : 0;
2412 $max_warn = $out->{'Maximum Warning Threshold'} ne '[N/A]'
2413 ? $out->{'Maximum Warning Threshold'} : 0;
2414 $reading =~ s{\A (\d+).* \z}{$1}xms;
2415 $max_warn =~ s{\A (\d+).* \z}{$1}xms;
2416 $max_crit =~ s{\A (\d+).* \z}{$1}xms;
2417 }
2418
2419 next FAN if blacklisted('fan', $index);
2420 $count{fan}++;
2421
2422 if ($status ne 'Ok') {
98b224a3 2423 my $msg = sprintf 'Chassis fan %d [%s] needs attention: %s',
669797e1 2424 $index, $location, $status;
2425 my $err = $snmp ? $probestatus2nagios{$status} : $status2nagios{$status};
2426 report('chassis', $msg, $err, $index);
2427 }
2428 else {
98b224a3 2429 my $msg = sprintf 'Chassis fan %d [%s]: %s',
669797e1 2430 $index, $location, $reading;
2431 report('chassis', $msg, $E_OK, $index);
2432 }
2433
2434 # Collect performance data
2435 if (defined $opt{perfdata}) {
2436 my $pname = lc $location;
2437 $pname =~ s{\s}{_}gxms;
2438 $pname =~ s{proc_}{cpu#}xms;
2439 my $pkey = join q{_}, 'fan', $index, $pname;
2440 my $pval = join q{;}, "${reading}RPM", $max_warn, $max_crit;
2441 $perfdata{$pkey} = $pval;
2442 }
2443 }
2444 return;
2445}
2446
2447
2448#-----------------------------------------
2449# CHASSIS: Check power supplies
2450#-----------------------------------------
2451sub check_powersupplies {
2452 my $index = undef;
2453 my $status = undef;
2454 my $type = undef;
2455 my $err_type = undef;
2456 my $state = undef;
2457 my @states = ();
2458 my @output = ();
2459
2460 if ($snmp) {
2461 my %ps_oid
2462 = (
2463 '1.3.6.1.4.1.674.10892.1.600.12.1.2.1' => 'powerSupplyIndex',
2464 '1.3.6.1.4.1.674.10892.1.600.12.1.5.1' => 'powerSupplyStatus',
2465 '1.3.6.1.4.1.674.10892.1.600.12.1.7.1' => 'powerSupplyType',
2466 '1.3.6.1.4.1.674.10892.1.600.12.1.11.1' => 'powerSupplySensorState',
2467 '1.3.6.1.4.1.674.10892.1.600.12.1.12.1' => 'powerSupplyConfigurationErrorType',
2468 );
2469 my $result = $snmp_session->get_entries(-columns => [keys %ps_oid]);
2470
2471 # No instrumented PSU is OK (blades, low-end servers)
2472 return 0 if !defined $result;
2473
2474 @output = @{ get_snmp_output($result, \%ps_oid) };
2475 }
2476 else {
2477 @output = @{ run_omreport("$omopt_chassis pwrsupplies") };
2478 }
2479
2480 my %ps_type
2481 = (
2482 1 => 'Other',
2483 2 => 'Unknown',
2484 3 => 'Linear',
2485 4 => 'Switching',
2486 5 => 'Battery',
2487 6 => 'Uninterruptible Power Supply',
2488 7 => 'Converter',
2489 8 => 'Regulator',
2490 9 => 'AC',
2491 10 => 'DC',
2492 11 => 'VRM',
2493 );
2494
2495 my %ps_state
2496 = (
2497 1 => 'Presence detected',
2498 2 => 'Failure detected',
2499 4 => 'Predictive Failure',
2500 8 => 'AC lost',
2501 16 => 'AC lost or out-of-range',
2502 32 => 'AC out-of-range but present',
2503 64 => 'Configuration error',
2504 );
2505
2506 my %ps_config_error_type
2507 = (
2508 1 => 'Vendor mismatch',
2509 2 => 'Revision mismatch',
2510 3 => 'Processor missing',
2511 );
2512
2513 PS:
2514 foreach my $out (@output) {
2515 if ($snmp) {
2516 @states = (); # contains states for the PS
2517
2518 $index = $out->{powerSupplyIndex} - 1;
2519 $status = $snmp_status{$out->{powerSupplyStatus}};
2520 $type = $ps_type{$out->{powerSupplyType}};
2521 $err_type = defined $out->{powerSupplyConfigurationErrorType}
2522 ? $ps_config_error_type{$out->{powerSupplyConfigurationErrorType}} : undef;
2523
2524 # get the combined state from the StatusReading OID
2525 foreach my $mask (sort keys %ps_state) {
2526 if (($out->{powerSupplySensorState} & $mask) != 0) {
2527 push @states, $ps_state{$mask};
2528 }
2529 }
2530
2531 # If configuration error, also include the error type
2532 if (defined $err_type) {
2533 push @states, $err_type;
2534 }
2535
2536 # Finally, construct the state string
2537 $state = join q{, }, @states;
2538 }
2539 else {
2540 $index = $out->{'Index'};
2541 $status = $out->{'Status'};
2542 $type = $out->{'Type'};
2543 $state = $out->{'Online Status'};
2544 }
2545
2546 next PS if blacklisted('ps', $index);
2547 $count{power}++;
2548
2549 if ($status ne 'Ok') {
98b224a3 2550 my $msg = sprintf 'Power Supply %d [%s] needs attention: %s',
669797e1 2551 $index, $type, $state;
2552 report('chassis', $msg, $status2nagios{$status}, $index);
2553 }
2554 else {
98b224a3 2555 my $msg = sprintf 'Power Supply %d [%s]: %s',
669797e1 2556 $index, $type, $state;
2557 report('chassis', $msg, $E_OK, $index);
2558 }
2559 }
2560 return;
2561}
2562
2563
2564#-----------------------------------------
2565# CHASSIS: Check temperatures
2566#-----------------------------------------
2567sub check_temperatures {
2568 my $index = undef;
2569 my $status = undef;
2570 my $reading = undef;
2571 my $location = undef;
2572 my $max_crit = undef;
2573 my $max_warn = undef;
2574 my $min_warn = undef;
2575 my $min_crit = undef;
2576 my $type = undef;
2577 my $discrete = undef;
2578 my @output = ();
2579
2580 # Getting custom temperature thresholds (user option)
2581 my %warn_threshold = %{ custom_temperature_thresholds('w') };
2582 my %crit_threshold = %{ custom_temperature_thresholds('c') };
2583
2584 if ($snmp) {
2585 my %temp_oid
2586 = (
2587 '1.3.6.1.4.1.674.10892.1.700.20.1.2.1' => 'temperatureProbeIndex',
2588 '1.3.6.1.4.1.674.10892.1.700.20.1.5.1' => 'temperatureProbeStatus',
2589 '1.3.6.1.4.1.674.10892.1.700.20.1.6.1' => 'temperatureProbeReading',
2590 '1.3.6.1.4.1.674.10892.1.700.20.1.7.1' => 'temperatureProbeType',
2591 '1.3.6.1.4.1.674.10892.1.700.20.1.8.1' => 'temperatureProbeLocationName',
2592 '1.3.6.1.4.1.674.10892.1.700.20.1.10.1' => 'temperatureProbeUpperCriticalThreshold',
2593 '1.3.6.1.4.1.674.10892.1.700.20.1.11.1' => 'temperatureProbeUpperNonCriticalThreshold',
2594 '1.3.6.1.4.1.674.10892.1.700.20.1.12.1' => 'temperatureProbeLowerNonCriticalThreshold',
2595 '1.3.6.1.4.1.674.10892.1.700.20.1.13.1' => 'temperatureProbeLowerCriticalThreshold',
2596 '1.3.6.1.4.1.674.10892.1.700.20.1.16.1' => 'temperatureProbeDiscreteReading',
2597 );
2598 # this didn't work well for some reason
2599 #my $result = $snmp_session->get_entries(-columns => [keys %temp_oid]);
2600
2601 # Getting values using the table
2602 my $temperatureProbeTable = '1.3.6.1.4.1.674.10892.1.700.20';
2603 my $result = $snmp_session->get_table(-baseoid => $temperatureProbeTable);
2604
2605 if (!defined $result) {
98b224a3 2606 printf "SNMP ERROR [temperatures]: %s.\n", $snmp_session->error;
669797e1 2607 $snmp_session->close;
2608 exit $E_UNKNOWN;
2609 }
2610
2611 @output = @{ get_snmp_output($result, \%temp_oid) };
2612 }
2613 else {
2614 @output = @{ run_omreport("$omopt_chassis temps") };
2615 }
2616
2617 my %probe_type
2618 = (
2619 1 => 'Other', # type is other than following values
2620 2 => 'Unknown', # type is unknown
2621 3 => 'AmbientESM', # type is Ambient Embedded Systems Management temperature probe
2622 16 => 'Discrete', # type is temperature probe with discrete reading
2623 );
2624
2625 TEMP:
2626 foreach my $out (@output) {
2627 if ($snmp) {
2628 $index = $out->{temperatureProbeIndex} - 1;
2629 $status = $snmp_probestatus{$out->{temperatureProbeStatus}};
2630 $reading = $out->{temperatureProbeReading} / 10;
2631 $location = $out->{temperatureProbeLocationName};
2632 $max_crit = $out->{temperatureProbeUpperCriticalThreshold} / 10;
2633 $max_warn = $out->{temperatureProbeUpperNonCriticalThreshold} / 10;
2634 $min_crit = exists $out->{temperatureProbeLowerCriticalThreshold}
2635 ? $out->{temperatureProbeLowerCriticalThreshold} / 10 : '[N/A]';
2636 $min_warn = exists $out->{temperatureProbeLowerNonCriticalThreshold}
2637 ? $out->{temperatureProbeLowerNonCriticalThreshold} / 10 : '[N/A]';
2638 $type = $probe_type{$out->{temperatureProbeType}};
2639 $discrete = exists $out->{temperatureProbeDiscreteReading}
2640 ? $out->{temperatureProbeDiscreteReading} : undef;
2641 }
2642 else {
2643 $index = $out->{'Index'};
2644 $status = $out->{'Status'};
2645 $reading = $out->{'Reading'}; $reading =~ s{\.0\s+C}{}xms;
2646 $location = $out->{'Probe Name'};
2647 $max_crit = $out->{'Maximum Failure Threshold'}; $max_crit =~ s{\.0\s+C}{}xms;
2648 $max_warn = $out->{'Maximum Warning Threshold'}; $max_warn =~ s{\.0\s+C}{}xms;
2649 $min_crit = $out->{'Minimum Failure Threshold'}; $min_crit =~ s{\.0\s+C}{}xms;
2650 $min_warn = $out->{'Minimum Warning Threshold'}; $min_warn =~ s{\.0\s+C}{}xms;
2651 $type = $reading =~ m{\A\d+\z}xms ? 'AmbientESM' : 'Discrete';
2652 $discrete = $reading;
2653 }
2654
2655 next TEMP if blacklisted('temp', $index);
2656 $count{temp}++;
2657
2658 if ($type eq 'Discrete') {
2659 my $msg = sprintf 'Temperature probe %d (%s): is %s',
2660 $index, $location, $discrete;
2661 my $err = $snmp ? $probestatus2nagios{$status} : $status2nagios{$status};
2662 report('chassis', $msg, $err, $index);
2663 }
2664 else {
2665 # First check according to custom thresholds
2666 if (exists $crit_threshold{$index}{max} and $reading > $crit_threshold{$index}{max}) {
2667 # Custom critical MAX
98b224a3 2668 my $msg = sprintf 'Temperature Probe %d [%s] reads %d C (custom max=%d)',
669797e1 2669 $index, $location, $reading, $crit_threshold{$index}{max};
2670 report('chassis', $msg, $E_CRITICAL, $index);
2671 }
2672 elsif (exists $warn_threshold{$index}{max} and $reading > $warn_threshold{$index}{max}) {
2673 # Custom warning MAX
98b224a3 2674 my $msg = sprintf 'Temperature Probe %d [%s] reads %d C (custom max=%d)',
669797e1 2675 $index, $location, $reading, $warn_threshold{$index}{max};
2676 report('chassis', $msg, $E_WARNING, $index);
2677 }
2678 elsif (exists $crit_threshold{$index}{min} and $reading < $crit_threshold{$index}{min}) {
2679 # Custom critical MIN
98b224a3 2680 my $msg = sprintf 'Temperature Probe %d [%s] reads %d C (custom min=%d)',
669797e1 2681 $index, $location, $reading, $crit_threshold{$index}{min};
2682 report('chassis', $msg, $E_CRITICAL, $index);
2683 }
2684 elsif (exists $warn_threshold{$index}{min} and $reading < $warn_threshold{$index}{min}) {
2685 # Custom warning MIN
98b224a3 2686 my $msg = sprintf 'Temperature Probe %d [%s] reads %d C (custom min=%d)',
669797e1 2687 $index, $location, $reading, $warn_threshold{$index}{min};
2688 report('chassis', $msg, $E_WARNING, $index);
2689 }
2690 elsif ($status ne 'Ok' and $max_crit ne '[N/A]' and $reading > $max_crit) {
98b224a3 2691 my $msg = sprintf 'Temperature Probe %d [%s] is critically high at %d C',
669797e1 2692 $index, $location, $reading;
2693 my $err = $snmp ? $probestatus2nagios{$status} : $status2nagios{$status};
2694 report('chassis', $msg, $err, $index);
2695 }
2696 elsif ($status ne 'Ok' and $max_warn ne '[N/A]' and $reading > $max_warn) {
98b224a3 2697 my $msg = sprintf 'Temperature Probe %d [%s] is too high at %d C',
669797e1 2698 $index, $location, $reading;
2699 my $err = $snmp ? $probestatus2nagios{$status} : $status2nagios{$status};
2700 report('chassis', $msg, $err, $index);
2701 }
2702 elsif ($status ne 'Ok' and $min_crit ne '[N/A]' and $reading < $min_crit) {
98b224a3 2703 my $msg = sprintf 'Temperature Probe %d [%s] is critically low at %d C',
669797e1 2704 $index, $location, $reading;
2705 my $err = $snmp ? $probestatus2nagios{$status} : $status2nagios{$status};
2706 report('chassis', $msg, $err, $index);
2707 }
2708 elsif ($status ne 'Ok' and $min_warn ne '[N/A]' and $reading < $min_warn) {
98b224a3 2709 my $msg = sprintf 'Temperature Probe %d [%s] is too low at %d C',
669797e1 2710 $index, $location, $reading;
2711 my $err = $snmp ? $probestatus2nagios{$status} : $status2nagios{$status};
2712 report('chassis', $msg, $err, $index);
2713 }
2714 # Ok
2715 else {
304c4cba 2716 my $msg = sprintf 'Temperature Probe %d [%s] reads %d C',
2717 $index, $location, $reading;
2718 if ($min_warn eq '[N/A]' and $min_crit eq '[N/A]') {
2719 $msg .= sprintf ' (max=%s/%s)', $max_warn, $max_crit;
2720 }
2721 else {
2722 $msg .= sprintf ' (min=%s/%s, max=%s/%s)',
2723 $min_warn, $min_crit, $max_warn, $max_crit;
2724 }
669797e1 2725 my $err = $snmp ? $probestatus2nagios{$status} : $status2nagios{$status};
2726 report('chassis', $msg, $err, $index);
2727 }
2728
2729 # Collect performance data
2730 if (defined $opt{perfdata}) {
2731 my $pname = lc $location;
2732 $pname =~ s{\s}{_}gxms;
2733 $pname =~ s{_temp\z}{}xms;
2734 $pname =~ s{proc_}{cpu#}xms;
2735 my $pkey = join q{_}, 'temp', $index, $pname;
2736 my $pval = join q{;}, "${reading}C", $max_warn, $max_crit;
2737 $perfdata{$pkey} = $pval;
2738 }
2739 }
2740 }
2741 return;
2742}
2743
2744
2745#-----------------------------------------
2746# CHASSIS: Check processors
2747#-----------------------------------------
2748sub check_processors {
2749 my $index = undef;
2750 my $status = undef;
2751 my $state = undef;
2752 my $oid_ver = 'new';
2753 my @output = ();
2754
2755 if ($snmp) {
2756
2757 # NOTE: For some reason, older models don't have the
2758 # "Processor Device Status" OIDs. We first check the newer
2759 # (preferred) OIDs, and if that doesn't work, check the "old"
2760 # OIDs.
2761
2762 my %cpu_oid_new # for newer models
2763 = (
2764 '1.3.6.1.4.1.674.10892.1.1100.32.1.2.1' => 'processorDeviceStatusIndex',
2765 '1.3.6.1.4.1.674.10892.1.1100.32.1.5.1' => 'processorDeviceStatusStatus',
2766 '1.3.6.1.4.1.674.10892.1.1100.32.1.6.1' => 'processorDeviceStatusReading',
2767 );
2768
2769 my %cpu_oid_old # for older models
2770 = (
2771 '1.3.6.1.4.1.674.10892.1.1100.30.1.2.1' => 'processorDeviceIndex',
2772 '1.3.6.1.4.1.674.10892.1.1100.30.1.5.1' => 'processorDeviceStatus',
2773 '1.3.6.1.4.1.674.10892.1.1100.30.1.9.1' => 'processorDeviceStatusState',
2774 );
2775
2776 my $result = $snmp_session->get_entries(-columns => [keys %cpu_oid_new]);
2777
2778 if (!defined $result) {
2779 $oid_ver = 'old';
2780 $result = $snmp_session->get_entries(-columns => [keys %cpu_oid_old]);
2781 }
2782
2783 if (!defined $result) {
98b224a3 2784 printf "SNMP ERROR [processors]: %s.\n", $snmp_session->error;
669797e1 2785 $snmp_session->close;
2786 exit $E_UNKNOWN;
2787 }
2788
2789 if ($oid_ver eq 'new') {
2790 @output = @{ get_snmp_output($result, \%cpu_oid_new) };
2791 }
2792 else {
2793 @output = @{ get_snmp_output($result, \%cpu_oid_old) };
2794 }
2795 }
2796 else {
2797 @output = @{ run_omreport("$omopt_chassis processors") };
2798 }
2799
2800 my %cpu_state
2801 = (
2802 1 => 'Other', # other than following values
2803 2 => 'Unknown', # unknown
2804 3 => 'Enabled', # enabled
2805 4 => 'User Disabled', # disabled by user via BIOS setup
2806 5 => 'BIOS Disabled', # disabled by BIOS (POST error)
2807 6 => 'Idle', # idle
2808 );
2809
2810 my %cpu_reading
2811 = (
2812 1 => 'Internal Error', # Internal Error
2813 2 => 'Thermal Trip', # Thermal Trip
2814 32 => 'Configuration Error', # Configuration Error
2815 128 => 'Present', # Processor Present
2816 256 => 'Disabled', # Processor Disabled
2817 512 => 'Terminator Present', # Terminator Present
2818 1024 => 'Throttled', # Processor Throttled
2819 );
2820
2821
2822 CPU:
2823 foreach my $out (@output) {
2824 if ($snmp) {
2825 if ($oid_ver eq 'new') {
2826 my @states = (); # contains states for the CPU
2827 $index = $out->{processorDeviceStatusIndex} - 1;
2828 $status = $snmp_status{$out->{processorDeviceStatusStatus}};
2829
2830 # get the combined state from the StatusReading OID
2831 foreach my $mask (sort keys %cpu_reading) {
2832 if (($out->{processorDeviceStatusReading} & $mask) != 0) {
2833 push @states, $cpu_reading{$mask};
2834 }
2835 }
2836
2837 # Finally, create the state string
2838 $state = join q{, }, @states;
2839 }
2840 else {
2841 $index = $out->{processorDeviceIndex} - 1;
2842 $status = $snmp_status{$out->{processorDeviceStatus}};
2843 $state = $cpu_state{$out->{processorDeviceStatusState}};
2844 }
2845 }
2846 else {
2847 $index = $out->{'Index'};
2848 $status = $out->{'Status'};
2849 $state = $out->{'State'};
2850 }
2851
2852 next CPU if blacklisted('cpu', $index);
2853
2854 # Ignore unoccupied CPU slots (omreport)
2855 next CPU if (defined $out->{'Processor Manufacturer'}
2856 and $out->{'Processor Manufacturer'} eq '[Not Occupied]')
2857 or (defined $out->{'Processor Brand'} and $out->{'Processor Brand'} eq '[Not Occupied]');
2858
2859 # Ignore unoccupied CPU slots (snmp)
2860 if ($snmp and exists $out->{processorDeviceStatusReading}
2861 and $out->{processorDeviceStatusReading} == 0) {
2862 next CPU;
2863 }
2864
2865 $count{cpu}++;
2866
2867 # Default
2868 if ($status ne 'Ok') {
2869 my $msg = sprintf 'CPU %d needs attention: %s',
2870 $index, $state;
2871 report('chassis', $msg, $status2nagios{$status}, $index);
2872 }
2873 # Ok
2874 else {
2875 my $msg = sprintf 'CPU %d is %s',
2876 $index, $state;
2877 report('chassis', $msg, $E_OK, $index);
2878 }
2879 }
2880 return;
2881}
2882
2883
2884#-----------------------------------------
2885# CHASSIS: Check voltage probes
2886#-----------------------------------------
2887sub check_volts {
2888 my $index = undef;
2889 my $status = undef;
2890 my $reading = undef;
2891 my $location = undef;
2892 my @output = ();
2893
2894 if ($snmp) {
2895 my %volt_oid
2896 = (
2897 '1.3.6.1.4.1.674.10892.1.600.20.1.2.1' => 'voltageProbeIndex',
2898 '1.3.6.1.4.1.674.10892.1.600.20.1.5.1' => 'voltageProbeStatus',
2899 '1.3.6.1.4.1.674.10892.1.600.20.1.6.1' => 'voltageProbeReading',
2900 '1.3.6.1.4.1.674.10892.1.600.20.1.8.1' => 'voltageProbeLocationName',
2901 '1.3.6.1.4.1.674.10892.1.600.20.1.16.1' => 'voltageProbeDiscreteReading',
2902 );
2903
2904 my $voltageProbeTable = '1.3.6.1.4.1.674.10892.1.600.20.1';
2905 my $result = $snmp_session->get_table(-baseoid => $voltageProbeTable);
2906
2907 if (!defined $result) {
98b224a3 2908 printf "SNMP ERROR [voltage]: %s.\n", $snmp_session->error;
669797e1 2909 $snmp_session->close;
2910 exit $E_UNKNOWN;
2911 }
2912
2913 @output = @{ get_snmp_output($result, \%volt_oid) };
2914 }
2915 else {
2916 @output = @{ run_omreport("$omopt_chassis volts") };
2917 }
2918
2919 my %volt_discrete_reading
2920 = (
2921 1 => 'Good',
2922 2 => 'Bad',
2923 );
2924
2925 VOLT:
2926 foreach my $out (@output) {
2927 if ($snmp) {
2928 $index = $out->{voltageProbeIndex} - 1;
2929 $status = $snmp_status{$out->{voltageProbeStatus}};
2930 $reading = exists $out->{voltageProbeReading}
2931 ? sprintf('%.3f V', $out->{voltageProbeReading}/1000)
2932 : $volt_discrete_reading{$out->{voltageProbeDiscreteReading}};
2933 $location = $out->{voltageProbeLocationName};
2934 }
2935 else {
2936 $index = $out->{'Index'};
2937 $status = $out->{'Status'};
2938 $reading = $out->{'Reading'};
2939 $location = $out->{'Probe Name'};
2940 }
2941
2942 next VOLT if blacklisted('volt', $index);
2943 $count{volt}++;
2944
98b224a3 2945 my $msg = sprintf 'Voltage sensor %d [%s] is %s',
669797e1 2946 $index, $location, $reading;
2947 my $err = $snmp ? $probestatus2nagios{$status} : $status2nagios{$status};
2948 report('chassis', $msg, $err, $index);
2949 }
2950 return;
2951}
2952
2953
2954#-----------------------------------------
2955# CHASSIS: Check batteries
2956#-----------------------------------------
2957sub check_batteries {
2958 my $index = undef;
2959 my $status = undef;
2960 my $reading = undef;
2961 my $location = undef;
2962 my @output = ();
2963
2964 if ($snmp) {
2965 my %bat_oid
2966 = (
2967 '1.3.6.1.4.1.674.10892.1.600.50.1.2.1' => 'batteryIndex',
2968 '1.3.6.1.4.1.674.10892.1.600.50.1.5.1' => 'batteryStatus',
2969 '1.3.6.1.4.1.674.10892.1.600.50.1.6.1' => 'batteryReading',
2970 '1.3.6.1.4.1.674.10892.1.600.50.1.7.1' => 'batteryLocationName',
2971 );
2972 my $result = $snmp_session->get_entries(-columns => [keys %bat_oid]);
2973
2974 # No batteries is OK
2975 return 0 if !defined $result;
2976
2977 @output = @{ get_snmp_output($result, \%bat_oid) };
2978 }
2979 else {
2980 @output = @{ run_omreport("$omopt_chassis batteries") };
2981 }
2982
2983 my %bat_reading
2984 = (
2985 1 => 'Predictive Failure',
2986 2 => 'Failed',
2987 4 => 'Presence Detected',
2988 );
2989
2990 BATTERY:
2991 foreach my $out (@output) {
2992 if ($snmp) {
2993 $index = $out->{batteryIndex} - 1;
2994 $status = $snmp_status{$out->{batteryStatus}};
2995 $reading = $bat_reading{$out->{batteryReading}};
2996 $location = $out->{batteryLocationName};
2997 }
2998 else {
2999 $index = $out->{'Index'};
3000 $status = $out->{'Status'};
3001 $reading = $out->{'Reading'};
3002 $location = $out->{'Probe Name'};
3003 }
3004
3005 next BATTERY if blacklisted('bp', $index);
3006 $count{bat}++;
3007
98b224a3 3008 my $msg = sprintf 'Battery probe %d [%s] is %s',
669797e1 3009 $index, $location, $reading;
3010 report('chassis', $msg, $status2nagios{$status}, $index);
3011 }
3012 return;
3013}
3014
3015
3016#-----------------------------------------
3017# CHASSIS: Check amperage probes (power monitoring)
3018#-----------------------------------------
3019sub check_pwrmonitoring {
3020 my $index = undef;
3021 my $status = undef;
3022 my $reading = undef;
3023 my $location = undef;
3024 my $max_crit = undef;
3025 my $max_warn = undef;
3026 my $unit = undef;
3027 my @output = ();
3028
3029 if ($snmp) {
3030 my %amp_oid
3031 = (
3032 '1.3.6.1.4.1.674.10892.1.600.30.1.2.1' => 'amperageProbeIndex',
3033 '1.3.6.1.4.1.674.10892.1.600.30.1.5.1' => 'amperageProbeStatus',
3034 '1.3.6.1.4.1.674.10892.1.600.30.1.6.1' => 'amperageProbeReading',
3035 '1.3.6.1.4.1.674.10892.1.600.30.1.7.1' => 'amperageProbeType',
3036 '1.3.6.1.4.1.674.10892.1.600.30.1.8.1' => 'amperageProbeLocationName',
3037 '1.3.6.1.4.1.674.10892.1.600.30.1.10.1' => 'amperageProbeUpperCriticalThreshold',
3038 '1.3.6.1.4.1.674.10892.1.600.30.1.11.1' => 'amperageProbeUpperNonCriticalThreshold',
3039 '1.3.6.1.4.1.674.10892.1.600.30.1.16.1' => 'amperageProbeDiscreteReading',
3040 );
3041 my $result = $snmp_session->get_entries(-columns => [keys %amp_oid]);
3042
3043 # No pwrmonitoring is OK
3044 return 0 if !defined $result;
3045
3046 @output = @{ get_snmp_output($result, \%amp_oid) };
3047 }
3048 else {
3049 @output = @{ run_omreport("$omopt_chassis pwrmonitoring") };
3050 }
3051
3052 my %amp_type # Amperage probe types
3053 = (
3054 1 => 'amperageProbeTypeIsOther', # other than following values
3055 2 => 'amperageProbeTypeIsUnknown', # unknown
3056 3 => 'amperageProbeTypeIs1Point5Volt', # 1.5 amperage probe
3057 4 => 'amperageProbeTypeIs3Point3volt', # 3.3 amperage probe
3058 5 => 'amperageProbeTypeIs5Volt', # 5 amperage probe
3059 6 => 'amperageProbeTypeIsMinus5Volt', # -5 amperage probe
3060 7 => 'amperageProbeTypeIs12Volt', # 12 amperage probe
3061 8 => 'amperageProbeTypeIsMinus12Volt', # -12 amperage probe
3062 9 => 'amperageProbeTypeIsIO', # I/O probe
3063 10 => 'amperageProbeTypeIsCore', # Core probe
3064 11 => 'amperageProbeTypeIsFLEA', # FLEA (standby) probe
3065 12 => 'amperageProbeTypeIsBattery', # Battery probe
3066 13 => 'amperageProbeTypeIsTerminator', # SCSI Termination probe
3067 14 => 'amperageProbeTypeIs2Point5Volt', # 2.5 amperage probe
3068 15 => 'amperageProbeTypeIsGTL', # GTL (ground termination logic) probe
3069 16 => 'amperageProbeTypeIsDiscrete', # amperage probe with discrete reading
3070 23 => 'amperageProbeTypeIsPowerSupplyAmps', # Power Supply probe with reading in Amps
3071 24 => 'amperageProbeTypeIsPowerSupplyWatts', # Power Supply probe with reading in Watts
3072 25 => 'amperageProbeTypeIsSystemAmps', # System probe with reading in Amps
3073 26 => 'amperageProbeTypeIsSystemWatts', # System probe with reading in Watts
3074 );
3075
3076 my %amp_discrete
3077 = (
3078 1 => 'Good',
3079 2 => 'Bad',
3080 );
3081
3082 my %amp_unit
3083 = (
3084 'amperageProbeTypeIsPowerSupplyAmps' => 'hA', # tenths of Amps
3085 'amperageProbeTypeIsSystemAmps' => 'hA', # tenths of Amps
3086 'amperageProbeTypeIsPowerSupplyWatts' => 'W', # Watts
3087 'amperageProbeTypeIsSystemWatts' => 'W', # Watts
3088 'amperageProbeTypeIsDiscrete' => q{}, # discrete reading, no unit
3089 );
3090
3091 AMP:
3092 foreach my $out (@output) {
3093 if ($snmp) {
3094 $index = $out->{amperageProbeIndex} - 1;
3095 $status = $snmp_status{$out->{amperageProbeStatus}};
3096 $reading = $amp_type{$out->{amperageProbeType}} eq 'amperageProbeTypeIsDiscrete'
3097 ? $amp_discrete{$out->{amperageProbeDiscreteReading}}
3098 : $out->{amperageProbeReading};
3099 $location = $out->{amperageProbeLocationName};
3100 $max_crit = exists $out->{amperageProbeUpperCriticalThreshold}
3101 ? $out->{amperageProbeUpperCriticalThreshold} : 0;
3102 $max_warn = exists $out->{amperageProbeUpperNonCriticalThreshold}
3103 ? $out->{amperageProbeUpperNonCriticalThreshold} : 0;
3104 $unit = exists $amp_unit{$amp_type{$out->{amperageProbeType}}}
3105 ? $amp_unit{$amp_type{$out->{amperageProbeType}}} : 'mA';
3106 if ($unit eq 'hA') {
3107 $reading /= 10;
3108 $max_crit /= 10;
3109 $max_warn /= 10;
3110 $unit = 'A';
3111 }
3112 }
3113 else {
3114 $index = $out->{'Index'};
3115 next if $index !~ m/^\d+$/x;
3116 $status = $out->{'Status'};
3117 $reading = $out->{'Reading'};
3118 $location = $out->{'Probe Name'};
3119 $max_crit = $out->{'Failure Threshold'} ne '[N/A]'
3120 ? $out->{'Failure Threshold'} : 0;
3121 $max_warn = $out->{'Warning Threshold'} ne '[N/A]'
3122 ? $out->{'Warning Threshold'} : 0;
3123 $reading =~ s{\A (\d+.*?)\s+([a-zA-Z]+) \s*\z}{$1}xms;
3124 $unit = $2;
3125 $max_warn =~ s{\A (\d+.*?)\s+[a-zA-Z]+ \s*\z}{$1}xms;
3126 $max_crit =~ s{\A (\d+.*?)\s+[a-zA-Z]+ \s*\z}{$1}xms;
3127 }
3128
3129 next AMP if blacklisted('pm', $index);
3130 next AMP if $index !~ m{\A \d+ \z}xms;
3131 $count{amp}++;
3132
98b224a3 3133 my $msg = sprintf 'Amperage probe %d [%s] reads %s %s',
669797e1 3134 $index, $location, $reading, $unit, $status;
3135 report('chassis', $msg, $status2nagios{$status}, $index);
3136
3137 # Collect performance data
3138 if (defined $opt{perfdata}) {
3139 next AMP if $reading !~ m{\A \d+(\.\d+)? \z}xms; # discrete reading (not number)
3140 my $pname = lc $location;
3141 $pname =~ s{\s}{_}gxms;
3142 my $pkey = join q{_}, 'pwr_mon', $index, $pname;
3143 my $pval = join q{;}, "$reading$unit", $max_warn, $max_crit;
3144 $perfdata{$pkey} = $pval;
3145 }
3146 }
3147
3148 # Collect EXTRA performance data not found at first run. This is a
3149 # rather ugly hack
3150 if (defined $opt{perfdata} && !$snmp) {
3151 my $found = 0;
3152 my $index = 0;
3153 my %used = ();
3154
3155 # find used indexes
3156 foreach (keys %perfdata) {
3157 if (m/\A pwr_mon_(\d+)/xms) {
3158 $used{$1} = 1;
3159 }
3160 }
3161
3162 AMP2:
3163 foreach my $line (@{ run_command("$omreport $omopt_chassis pwrmonitoring -fmt ssv") }) {
3164 chop $line;
3165 if ($line eq 'Location;Reading') {
3166 $found = 1;
3167 next AMP2;
3168 }
3169 if ($line eq q{}) {
3170 $found = 0;
3171 next AMP2;
3172 }
3173 if ($found and $line =~ m/\A ([^;]+?) ; (\d*\.\d+) \s ([AW]) \z/xms) {
3174 my $aname = lc $1;
3175 my $aval = $2;
3176 my $aunit = $3;
3177 $aname =~ s{\s}{_}gxms;
3178
3179 # don't use an existing index
3180 while (exists $used{$index}) { ++$index; }
3181
3182 $perfdata{"pwr_mon_${index}_${aname}"} = "$aval$aunit;0;0";
3183 ++$index;
3184 }
3185 }
3186 }
3187
3188 return;
3189}
3190
3191
3192#-----------------------------------------
3193# CHASSIS: Check intrusion
3194#-----------------------------------------
3195sub check_intrusion {
3196 my $index = undef;
3197 my $status = undef;
3198 my $reading = undef;
3199 my @output = ();
3200
3201 if ($snmp) {
3202 my %int_oid
3203 = (
3204 '1.3.6.1.4.1.674.10892.1.300.70.1.2.1' => 'intrusionIndex',
3205 '1.3.6.1.4.1.674.10892.1.300.70.1.5.1' => 'intrusionStatus',
3206 '1.3.6.1.4.1.674.10892.1.300.70.1.6.1' => 'intrusionReading',
3207 );
3208 my $result = $snmp_session->get_entries(-columns => [keys %int_oid]);
3209
3210 # No intrusion is OK
3211 return 0 if !defined $result;
3212
3213 @output = @{ get_snmp_output($result, \%int_oid) };
3214 }
3215 else {
3216 @output = @{ run_omreport("$omopt_chassis intrusion") };
3217 }
3218
3219 my %int_reading
3220 = (
3221 1 => 'Not Breached', # chassis not breached and no uncleared breaches
3222 2 => 'Breached', # chassis currently breached
3223 3 => 'Breached Prior', # chassis breached prior to boot and has not been cleared
3224 4 => 'Breach Sensor Failure', # intrusion sensor has failed
3225 );
3226
3227 INTRUSION:
3228 foreach my $out (@output) {
3229 if ($snmp) {
3230 $index = $out->{intrusionIndex} - 1;
3231 $status = $snmp_status{$out->{intrusionStatus}};
3232 $reading = $int_reading{$out->{intrusionReading}};
3233 }
3234 else {
3235 $index = $out->{'Index'};
3236 $status = $out->{'Status'};
3237 $reading = $out->{'State'};
3238 }
3239
3240 next INTRUSION if blacklisted('intr', $index);
3241 $count{intr}++;
3242
3243 if ($status ne 'Ok') {
3244 my $msg = sprintf 'Chassis intrusion %d detected: %s',
3245 $index, $reading;
3246 report('chassis', $msg, $E_WARNING, $index);
3247 }
3248 # Ok
3249 else {
3250 my $msg = sprintf 'Chassis intrusion %d detection: %s (%s)',
3251 $index, $status, $reading;
3252 report('chassis', $msg, $E_OK, $index);
3253 }
3254 }
3255 return;
3256}
3257
3258
3259#-----------------------------------------
3260# CHASSIS: Check alert log
3261#-----------------------------------------
3262sub check_alertlog {
3263 return if $snmp; # Not supported with SNMP
3264
3265 my @output = @{ run_omreport("$omopt_system alertlog") };
3266 foreach my $out (@output) {
3267 ++$count{alert}{$out->{Severity}};
3268 }
3269
3270 # Create error messages and set exit value if appropriate
3271 my $err = 0;
3272 if ($count{alert}{'Critical'} > 0) { $err = $E_CRITICAL; }
3273 elsif ($count{alert}{'Non-Critical'} > 0) { $err = $E_WARNING; }
3274
3275 my $msg = sprintf 'Alert log content: %d critical, %d non-critical, %d ok',
3276 $count{alert}{'Critical'}, $count{alert}{'Non-Critical'}, $count{alert}{'Ok'};
3277 report('other', $msg, $err);
3278
3279 return;
3280}
3281
3282#-----------------------------------------
3283# CHASSIS: Check ESM log overall health
3284#-----------------------------------------
3285sub check_esmlog_health {
3286 my $health = 'Ok';
3287
3288 if ($snmp) {
3289 my $systemStateEventLogStatus = '1.3.6.1.4.1.674.10892.1.200.10.1.41.1';
3290 my $result = $snmp_session->get_request(-varbindlist => [$systemStateEventLogStatus]);
3291 if (!defined $result) {
98b224a3 3292 my $msg = sprintf 'SNMP ERROR [esmhealth]: %s',
669797e1 3293 $snmp_session->error;
3294 report('other', $msg, $E_UNKNOWN);
3295 }
3296 $health = $snmp_status{$result->{$systemStateEventLogStatus}};
3297 }
3298 else {
3299 foreach (@{ run_command("$omreport $omopt_system esmlog -fmt ssv") }) {
3300 if (m/\A Health;(.+) \z/xms) {
3301 $health = $1;
3302 chop $health;
3303 last;
3304 }
3305 }
3306 }
3307
3308 # If the overall health of the ESM log is other than "Ok", the
3309 # fill grade of the log is more than 80% and the log should be
3310 # cleared
3311 if ($health eq 'Ok') {
98b224a3 3312 my $msg = sprintf 'ESM log health is OK (less than 80%% full)';
669797e1 3313 report('other', $msg, $E_OK);
3314 }
3315 elsif ($health eq 'Critical') {
3316 my $msg = sprintf 'ESM log is 100%% full!';
3317 report('other', $msg, $status2nagios{$health});
3318 }
3319 else {
3320 my $msg = sprintf 'ESM log is more than 80%% full';
3321 report('other', $msg, $status2nagios{$health});
3322 }
3323
3324 return;
3325}
3326
3327#-----------------------------------------
3328# CHASSIS: Check ESM log
3329#-----------------------------------------
3330sub check_esmlog {
3331 my @output = ();
3332
3333 if ($snmp) {
3334 my %esm_oid
3335 = (
3336 '1.3.6.1.4.1.674.10892.1.300.40.1.7.1' => 'eventLogSeverityStatus',
3337 );
3338 my $result = $snmp_session->get_entries(-columns => [keys %esm_oid]);
3339
3340 # No entries is OK
3341 return if !defined $result;
3342
3343 @output = @{ get_snmp_output($result, \%esm_oid) };
3344 foreach my $out (@output) {
3345 ++$count{esm}{$snmp_status{$out->{eventLogSeverityStatus}}};
3346 }
3347 }
3348 else {
3349 @output = @{ run_omreport("$omopt_system esmlog") };
3350 foreach my $out (@output) {
3351 ++$count{esm}{$out->{Severity}};
3352 }
3353 }
3354
3355 # Create error messages and set exit value if appropriate
3356 my $err = 0;
3357 if ($count{esm}{'Critical'} > 0) { $err = $E_CRITICAL; }
3358 elsif ($count{esm}{'Non-Critical'} > 0) { $err = $E_WARNING; }
3359
3360 my $msg = sprintf 'ESM log content: %d critical, %d non-critical, %d ok',
3361 $count{esm}{'Critical'}, $count{esm}{'Non-Critical'}, $count{esm}{'Ok'};
3362 report('other', $msg, $err);
3363
3364 return;
3365}
3366
3367#
3368# Handy function for checking all storage components
3369#
3370sub check_storage {
3371 check_controllers();
3372 check_physical_disks();
3373 check_virtual_disks();
3374 check_cache_battery();
3375 check_connectors();
3376 check_enclosures();
3377 check_enclosure_fans();
3378 check_enclosure_pwr();
3379 check_enclosure_temp();
3380 check_enclosure_emms();
3381 return;
3382}
3383
3384
3385
3386#---------------------------------------------------------------------
3387# Info functions
3388#---------------------------------------------------------------------
3389
3390#
3391# Fetch output from 'omreport chassis info', put in sysinfo hash
3392#
3393sub get_omreport_chassis_info {
3394 if (open my $INFO, '-|', "$omreport $omopt_chassis info -fmt ssv") {
3395 my @lines = <$INFO>;
3396 close $INFO;
3397 foreach (@lines) {
3398 next if !m/\A (Chassis\sModel|Chassis\sService\sTag|Model|Service\sTag)/xms;
3399 my ($key, $val) = split /;/xms;
3400 $key =~ s{\s+\z}{}xms; # remove trailing whitespace
3401 $val =~ s{\s+\z}{}xms; # remove trailing whitespace
3402 if ($key eq 'Chassis Model' or $key eq 'Model') {
3403 $sysinfo{model} = $val;
3404 }
3405 if ($key eq 'Chassis Service Tag' or $key eq 'Service Tag') {
3406 $sysinfo{serial} = $val;
3407 }
3408 }
3409 }
3410 return;
3411}
3412
3413#
3414# Fetch output from 'omreport chassis bios', put in sysinfo hash
3415#
3416sub get_omreport_chassis_bios {
3417 if (open my $BIOS, '-|', "$omreport $omopt_chassis bios -fmt ssv") {
3418 my @lines = <$BIOS>;
3419 close $BIOS;
3420 foreach (@lines) {
3421 next if !m/;/xms;
3422 my ($key, $val) = split /;/xms;
3423 $key =~ s{\s+\z}{}xms; # remove trailing whitespace
3424 $val =~ s{\s+\z}{}xms; # remove trailing whitespace
3425 $sysinfo{bios} = $val if $key eq 'Version';
3426 $sysinfo{biosdate} = $val if $key eq 'Release Date';
3427 }
3428 }
3429 return;
3430}
3431
3432#
3433# Fetch output from 'omreport system operatingsystem', put in sysinfo hash
3434#
3435sub get_omreport_system_operatingsystem {
3436 if (open my $VER, '-|', "$omreport $omopt_system operatingsystem -fmt ssv") {
3437 my @lines = <$VER>;
3438 close $VER;
3439 foreach (@lines) {
3440 next if !m/;/xms;
3441 my ($key, $val) = split /;/xms;
3442 $key =~ s{\s+\z}{}xms; # remove trailing whitespace
3443 $val =~ s{\s+\z}{}xms; # remove trailing whitespace
3444 if ($key eq 'Operating System') {
3445 $sysinfo{osname} = $val;
3446 }
3447 elsif ($key eq 'Operating System Version') {
3448 $sysinfo{osver} = $val;
3449 }
3450 }
3451 }
3452 return;
3453}
3454
3455#
3456# Fetch output from 'omreport about', put in sysinfo hash
3457#
3458sub get_omreport_about {
3459 if (open my $OM, '-|', "$omreport about -fmt ssv") {
3460 my @lines = <$OM>;
3461 close $OM;
3462 foreach (@lines) {
3463 if (m/\A Version;(.+) \z/xms) {
3464 $sysinfo{om} = $1;
3465 chomp $sysinfo{om};
3466 }
3467 }
3468 }
3469 return;
3470}
3471
3472#
3473# Fetch chassis info via SNMP, put in sysinfo hash
3474#
3475sub get_snmp_chassis_info {
3476 my %chassis_oid
3477 = (
3478 '1.3.6.1.4.1.674.10892.1.300.10.1.9.1' => 'chassisModelName',
3479 '1.3.6.1.4.1.674.10892.1.300.10.1.11.1' => 'chassisServiceTagName',
3480 );
3481
3482 my $chassisInformationTable = '1.3.6.1.4.1.674.10892.1.300.10.1';
3483 my $result = $snmp_session->get_table(-baseoid => $chassisInformationTable);
3484
3485 if (defined $result) {
3486 foreach my $oid (keys %{ $result }) {
3487 if (exists $chassis_oid{$oid} and $chassis_oid{$oid} eq 'chassisModelName') {
3488 $sysinfo{model} = $result->{$oid};
3489 $sysinfo{model} =~ s{\s+\z}{}xms; # remove trailing whitespace
3490 }
3491 elsif (exists $chassis_oid{$oid} and $chassis_oid{$oid} eq 'chassisServiceTagName') {
3492 $sysinfo{serial} = $result->{$oid};
3493 }
3494 }
3495 }
3496 else {
3497 my $msg = sprintf 'SNMP ERROR getting chassis info: %s',
3498 $snmp_session->error;
3499 report('other', $msg, $E_UNKNOWN);
3500 }
3501 return;
3502}
3503
3504#
3505# Fetch BIOS info via SNMP, put in sysinfo hash
3506#
3507sub get_snmp_chassis_bios {
3508 my %bios_oid
3509 = (
3510 '1.3.6.1.4.1.674.10892.1.300.50.1.7.1.1' => 'systemBIOSReleaseDateName',
3511 '1.3.6.1.4.1.674.10892.1.300.50.1.8.1.1' => 'systemBIOSVersionName',
3512 );
3513
3514 my $systemBIOSTable = '1.3.6.1.4.1.674.10892.1.300.50.1';
3515 my $result = $snmp_session->get_table(-baseoid => $systemBIOSTable);
3516
3517 if (defined $result) {
3518 foreach my $oid (keys %{ $result }) {
3519 if (exists $bios_oid{$oid} and $bios_oid{$oid} eq 'systemBIOSReleaseDateName') {
3520 $sysinfo{biosdate} = $result->{$oid};
3521 $sysinfo{biosdate} =~ s{\A (\d{4})(\d{2})(\d{2}).*}{$2/$3/$1}xms;
3522 }
3523 elsif (exists $bios_oid{$oid} and $bios_oid{$oid} eq 'systemBIOSVersionName') {
3524 $sysinfo{bios} = $result->{$oid};
3525 }
3526 }
3527 }
3528 else {
3529 my $msg = sprintf 'SNMP ERROR getting BIOS info: %s',
3530 $snmp_session->error;
3531 report('other', $msg, $E_UNKNOWN);
3532 }
3533 return;
3534}
3535
3536#
3537# Fetch OS info via SNMP, put in sysinfo hash
3538#
3539sub get_snmp_system_operatingsystem {
3540 my %os_oid
3541 = (
3542 '1.3.6.1.4.1.674.10892.1.400.10.1.6.1' => 'operatingSystemOperatingSystemName',
3543 '1.3.6.1.4.1.674.10892.1.400.10.1.7.1' => 'operatingSystemOperatingSystemVersionName',
3544 );
3545
3546 my $operatingSystemTable = '1.3.6.1.4.1.674.10892.1.400.10.1';
3547 my $result = $snmp_session->get_table(-baseoid => $operatingSystemTable);
3548
3549 if (defined $result) {
3550 foreach my $oid (keys %{ $result }) {
3551 if (exists $os_oid{$oid} and $os_oid{$oid} eq 'operatingSystemOperatingSystemName') {
3552 $sysinfo{osname} = ($result->{$oid});
3553 }
3554 elsif (exists $os_oid{$oid} and $os_oid{$oid} eq 'operatingSystemOperatingSystemVersionName') {
3555 $sysinfo{osver} = $result->{$oid};
3556 }
3557 }
3558 }
3559 else {
3560 my $msg = sprintf 'SNMP ERROR getting OS info: %s',
3561 $snmp_session->error;
3562 report('other', $msg, $E_UNKNOWN);
3563 }
3564 return;
3565}
3566
3567#
3568# Fetch OMSA version via SNMP, put in sysinfo hash
3569#
3570sub get_snmp_about {
3571 my %omsa_oid
3572 = (
3573 '1.3.6.1.4.1.674.10892.1.100.10.0' => 'systemManagementSoftwareGlobalVersionName',
3574 );
3575 my $systemManagementSoftwareGroup = '1.3.6.1.4.1.674.10892.1.100';
3576 my $result = $snmp_session->get_table(-baseoid => $systemManagementSoftwareGroup);
3577 if (defined $result) {
3578 foreach my $oid (keys %{ $result }) {
3579 if (exists $omsa_oid{$oid} and $omsa_oid{$oid} eq 'systemManagementSoftwareGlobalVersionName') {
3580 $sysinfo{om} = ($result->{$oid});
3581 }
3582 }
3583 }
3584 else {
3585 my $msg = sprintf 'SNMP ERROR getting OMSA info: %s',
3586 $snmp_session->error;
3587 report('other', $msg, $E_UNKNOWN);
3588 }
3589 return;
3590}
3591
3592#
3593# Collects some information about the system
3594#
3595sub get_sysinfo
3596{
3597 # Get system model and serial number
3598 $snmp ? get_snmp_chassis_info() : get_omreport_chassis_info();
3599
3600 # Get BIOS information. Only if needed
3601 if ( $opt{okinfo} >= 1
3602 or $opt{debug}
3603 or (defined $opt{postmsg} and $opt{postmsg} =~ m/[%][bd]/xms) ) {
3604 $snmp ? get_snmp_chassis_bios() : get_omreport_chassis_bios();
3605 }
3606
3607 # Return now if debug
3608 return if $opt{debug};
3609
3610 # Get OS information. Only if needed
3611 if (defined $opt{postmsg} and $opt{postmsg} =~ m/[%][or]/xms) {
3612 $snmp ? get_snmp_system_operatingsystem() : get_omreport_system_operatingsystem();
3613 }
3614
3615 # Get OMSA information. Only if needed
3616 if ($opt{okinfo} >= 3) {
3617 $snmp ? get_snmp_about() : get_omreport_about();
3618 }
3619
3620 return;
3621}
3622
3623
3624# Helper function for running omreport when the results are strictly
3625# name=value pairs.
3626sub run_omreport_info {
3627 my $command = shift;
3628 my %output = ();
3629 my @keys = ();
3630
3631 # Run omreport and fetch output
3632 my $rawtext = slurp_command("$omreport $command -fmt ssv 2>&1");
3633
3634 # Parse output, store in array
3635 for ((split /\n/xms, $rawtext)) {
3636 if (m/\A Error/xms) {
3637 my $msg = "Problem running 'omreport $command': $_";
3638 report('other', $msg, $E_UNKNOWN);
3639 }
3640 next if !m/;/xms; # ignore lines with less than two fields
3641 my @vals = split m/;/xms;
3642 $output{$vals[0]} = $vals[1];
3643 }
3644
3645 # Finally, return the collected information
3646 return \%output;
3647}
3648
3649# Get various firmware information (BMC, RAC)
3650sub get_firmware_info {
3651 my @snmp_output = ();
3652 my %nrpe_output = ();
3653
3654 if ($snmp) {
3655 my %fw_oid
3656 = (
3657 '1.3.6.1.4.1.674.10892.1.300.60.1.7.1' => 'firmwareType',
3658 '1.3.6.1.4.1.674.10892.1.300.60.1.8.1' => 'firmwareTypeName',
3659 '1.3.6.1.4.1.674.10892.1.300.60.1.11.1' => 'firmwareVersionName',
3660 );
3661
3662 my $firmwareTable = '1.3.6.1.4.1.674.10892.1.300.60.1';
3663 my $result = $snmp_session->get_table(-baseoid => $firmwareTable);
3664
3665 # Some don't have this OID, this is ok
3666 if (!defined $result) {
3667 return;
3668 }
3669
3670 @snmp_output = @{ get_snmp_output($result, \%fw_oid) };
3671 }
3672 else {
3673 %nrpe_output = %{ run_omreport_info("$omopt_chassis info") };
3674 }
3675
3676 my %fw_type # Firmware types
3677 = (
3678 1 => 'other', # other than following values
3679 2 => 'unknown', # unknown
3680 3 => 'systemBIOS', # System BIOS
3681 4 => 'embeddedSystemManagementController', # Embedded System Management Controller
3682 5 => 'powerSupplyParallelingBoard', # Power Supply Paralleling Board
3683 6 => 'systemBackPlane', # System (Primary) Backplane
3684 7 => 'powerVault2XXSKernel', # PowerVault 2XXS Kernel
3685 8 => 'powerVault2XXSApplication', # PowerVault 2XXS Application
3686 9 => 'frontPanel', # Front Panel Controller
3687 10 => 'baseboardManagementController', # Baseboard Management Controller
3688 11 => 'hotPlugPCI', # Hot Plug PCI Controller
3689 12 => 'sensorData', # Sensor Data Records
3690 13 => 'peripheralBay', # Peripheral Bay Backplane
3691 14 => 'secondaryBackPlane', # Secondary Backplane for ESM 2 systems
3692 15 => 'secondaryBackPlaneESM3And4', # Secondary Backplane for ESM 3 and 4 systems
3693 16 => 'rac', # Remote Access Controller
3694 17 => 'imc' # Integrated Management Controller
3695 );
3696
3697
3698 if ($snmp) {
3699 foreach my $out (@snmp_output) {
3700 if ($fw_type{$out->{firmwareType}} eq 'baseboardManagementController') {
3701 $sysinfo{'bmc'} = 1;
3702 $sysinfo{'bmc_fw'} = $out->{firmwareVersionName};
3703 }
3704 elsif ($fw_type{$out->{firmwareType}} =~ m{\A rac|imc \z}xms) {
3705 my $name = $out->{firmwareTypeName}; $name =~ s/\s//gxms;
3706 $sysinfo{'rac'} = 1;
3707 $sysinfo{'rac_name'} = $name;
3708 $sysinfo{'rac_fw'} = $out->{firmwareVersionName};
3709 }
3710 }
3711 }
3712 else {
3713 foreach my $key (keys %nrpe_output) {
3714 next if !defined $nrpe_output{$key};
3715 if ($key eq 'BMC Version' or $key eq 'Baseboard Management Controller Version') {
3716 $sysinfo{'bmc'} = 1;
3717 $sysinfo{'bmc_fw'} = $nrpe_output{$key};
3718 }
3719 elsif ($key =~ m{\A (i?DRAC)\s*(\d?)\s+Version}xms) {
3720 my $name = "$1$2";
3721 $sysinfo{'rac'} = 1;
3722 $sysinfo{'rac_fw'} = $nrpe_output{$key};
3723 $sysinfo{'rac_name'} = $name;
3724 }
3725 }
3726 }
3727
3728 return;
3729}
3730
3731
3732
3733#=====================================================================
3734# Main program
3735#=====================================================================
3736
3737# Here we do the actual checking of components
3738# Check global status if applicable
3739if ($global) {
3740 $globalstatus = check_global();
3741}
3742
3743# Do multiple selected checks
3744if ($check{storage}) { check_storage(); }
3745if ($check{memory}) { check_memory(); }
3746if ($check{fans}) { check_fans(); }
3747if ($check{power}) { check_powersupplies(); }
3748if ($check{temp}) { check_temperatures(); }
3749if ($check{cpu}) { check_processors(); }
3750if ($check{voltage}) { check_volts(); }
3751if ($check{batteries}) { check_batteries(); }
3752if ($check{amperage}) { check_pwrmonitoring(); }
3753if ($check{intrusion}) { check_intrusion(); }
3754if ($check{alertlog}) { check_alertlog(); }
3755if ($check{esmlog}) { check_esmlog(); }
3756if ($check{esmhealth}) { check_esmlog_health(); }
3757
3758
3759#---------------------------------------------------------------------
3760# Finish up
3761#---------------------------------------------------------------------
3762
3763# Counter variable
3764%nagios_alert_count
3765 = (
3766 'OK' => 0,
3767 'WARNING' => 0,
3768 'CRITICAL' => 0,
3769 'UNKNOWN' => 0,
3770 );
3771
3772# Get system information
3773get_sysinfo();
3774
3775# Get firmware info if requested via option
3776if ($opt{okinfo} >= 1) {
3777 get_firmware_info();
3778}
3779
3780# Close SNMP session
3781if ($snmp) {
3782 $snmp_session->close;
3783}
3784
3785# Print messages
3786if ($opt{debug}) {
3787 print " System: $sysinfo{model}\n";
3788 print " ServiceTag: $sysinfo{serial}\n";
3789 print " BIOS/date: $sysinfo{bios} $sysinfo{biosdate}\n";
3790 if ($#report_storage >= 0) {
3791 print "-----------------------------------------------------------------------------\n";
3792 print " Storage Components \n";
3793 print "=============================================================================\n";
3794 print " STATE | ID | MESSAGE TEXT \n";
3795 print "---------+----------+--------------------------------------------------------\n";
3796 foreach (@report_storage) {
3797 my ($msg, $level, $nexus) = @{$_};
3798 print q{ } x (8 - length $reverse_exitcode{$level}) . "$reverse_exitcode{$level} | "
3799 . q{ } x (8 - length $nexus) . "$nexus | $msg\n";
3800 $nagios_alert_count{$reverse_exitcode{$level}}++;
3801 }
3802 }
3803 if ($#report_chassis >= 0) {
3804 print "-----------------------------------------------------------------------------\n";
3805 print " Chassis Components \n";
3806 print "=============================================================================\n";
3807 print " STATE | ID | MESSAGE TEXT \n";
3808 print "---------+------+------------------------------------------------------------\n";
3809 foreach (@report_chassis) {
3810 my ($msg, $level, $nexus) = @{$_};
3811 print q{ } x (8 - length $reverse_exitcode{$level}) . "$reverse_exitcode{$level} | "
3812 . q{ } x (4 - length $nexus) . "$nexus | $msg\n";
3813 $nagios_alert_count{$reverse_exitcode{$level}}++;
3814 }
3815 }
3816 if ($#report_other >= 0) {
3817 print "-----------------------------------------------------------------------------\n";
3818 print " Other messages \n";
3819 print "=============================================================================\n";
3820 print " STATE | MESSAGE TEXT \n";
3821 print "---------+-------------------------------------------------------------------\n";
3822 foreach (@report_other) {
3823 my ($msg, $level, $nexus) = @{$_};
3824 print q{ } x (8 - length $reverse_exitcode{$level}) . "$reverse_exitcode{$level} | $msg\n";
3825 $nagios_alert_count{$reverse_exitcode{$level}}++;
3826 }
3827 }
3828}
3829else {
3830 my $c = 0; # counter to determine linebreaks
3831
3832 # Run through each message, sorted by severity level
3833 ALERT:
3834 foreach (sort {$a->[1] < $b->[1]} (@report_storage, @report_chassis, @report_other)) {
3835 my ($msg, $level, $nexus) = @{ $_ };
3836 next ALERT if $level == $E_OK;
3837
3838 if (defined $opt{only}) {
3839 # If user wants only critical alerts
3840 next ALERT if ($opt{only} eq 'critical' and $level == $E_WARNING);
3841
3842 # If user wants only warning alerts
3843 next ALERT if ($opt{only} eq 'warning' and $level == $E_CRITICAL);
3844 }
3845
3846 # Prefix with service tag if specified with option '-i|--info'
3847 if ($opt{info}) {
3848 if (defined $opt{htmlinfo}) {
3849 $msg = '[<a href="' . warranty_url($sysinfo{serial})
3850 . "\">$sysinfo{serial}</a>] " . $msg;
3851 }
3852 else {
3853 $msg = "[$sysinfo{serial}] " . $msg;
3854 }
3855 }
3856
3857 # Prefix with nagios level if specified with option '--state'
3858 $msg = $reverse_exitcode{$level} . ": $msg" if $opt{state};
3859
3860 # Prefix with one-letter nagios level if specified with option '--short-state'
3861 $msg = (substr $reverse_exitcode{$level}, 0, 1) . ": $msg" if $opt{shortstate};
3862
3863 ($c++ == 0) ? print $msg : print $linebreak, $msg;
3864
3865 $nagios_alert_count{$reverse_exitcode{$level}}++;
3866 }
3867}
3868
3869# Determine our exit code
3870$exit_code = $E_OK;
3871$exit_code = $E_UNKNOWN if $nagios_alert_count{'UNKNOWN'} > 0;
3872$exit_code = $E_WARNING if $nagios_alert_count{'WARNING'} > 0;
3873$exit_code = $E_CRITICAL if $nagios_alert_count{'CRITICAL'} > 0;
3874
3875# Global status via SNMP.. extra safety check
3876if ($globalstatus != $E_OK && $exit_code == $E_OK && !defined $opt{only}) {
3877 print "OOPS! Something is wrong with this server, but I don't know what. ";
3878 print "The global system health status is $reverse_exitcode{$globalstatus}, ";
3879 print "but every component check is OK. This may be a bug in the Nagios plugin, ";
3880 print "please file a bug report.\n";
3881 exit $E_UNKNOWN;
3882}
3883
3884# Print OK message
3885if ($exit_code == $E_OK && defined $opt{only} && $opt{only} !~ m{\A critical|warning|chassis \z}xms && !$opt{debug}) {
3886 my %okmsg
3887 = ( 'storage' => "STORAGE OK - $count{pdisk} physical drives, $count{vdisk} logical drives",
3888 'fans' => $count{fan} == 0 && $blade ? 'OK - blade system with no fan probes' : "FANS OK - $count{fan} fan probes checked",
3889 'temp' => "TEMPERATURES OK - $count{temp} temperature probes checked",
3890 'memory' => "MEMORY OK - $count{dimm} memory modules checked",
3891 'power' => $count{power} == 0 ? 'OK - no instrumented power supplies found' : "POWER OK - $count{power} power supplies checked",
3892 'cpu' => "PROCESSORS OK - $count{cpu} processors checked",
3893 'voltage' => "VOLTAGE OK - $count{volt} voltage probes checked",
3894 'batteries' => $count{bat} == 0 ? 'OK - no batteries found' : "BATTERIES OK - $count{bat} batteries checked",
3895 'amperage' => $count{amp} == 0 ? 'OK - no power monitoring probes found' : "AMPERAGE OK - $count{amp} amperage (power monitoring) probes checked",
3896 'intrusion' => $count{intr} == 0 ? 'OK - no intrusion detection probes found' : "INTRUSION OK - $count{intr} intrusion detection probes checked",
3897 'alertlog' => $snmp ? 'OK - not supported via snmp' : "OK - Alert Log content: $count{alert}{Ok} ok, $count{alert}{'Non-Critical'} warning and $count{alert}{Critical} critical",
3898 'esmlog' => "OK - ESM Log content: $count{esm}{Ok} ok, $count{esm}{'Non-Critical'} warning and $count{esm}{Critical} critical",
3899 'esmhealth' => "ESM LOG OK - less than 80% used",
3900 );
3901
3902 print $okmsg{$opt{only}};
3903}
3904elsif ($exit_code == $E_OK && !$opt{debug}) {
3905 if (defined $opt{htmlinfo}) {
3906 printf q{OK - System: '<a href="%s">%s</a>', SN: '<a href="%s">%s</a>', hardware working fine},
3907 documentation_url($sysinfo{model}), $sysinfo{model},
3908 warranty_url($sysinfo{serial}), $sysinfo{serial};
3909 }
3910 else {
3911 printf q{OK - System: '%s', SN: '%s', hardware working fine},
3912 $sysinfo{model}, $sysinfo{serial};
3913 }
3914
3915 if ($check{storage}) {
3916 printf ', %d logical drives, %d physical drives',
3917 $count{vdisk}, $count{pdisk};
3918 }
3919 else {
3920 print ', not checking storage';
3921 }
3922
3923 if ($opt{okinfo} >= 1) {
3924 print $linebreak;
3925 printf q{----- BIOS='%s %s'}, $sysinfo{bios}, $sysinfo{biosdate};
3926
3927 if ($sysinfo{rac}) {
3928 printf q{, %s='%s'}, $sysinfo{rac_name}, $sysinfo{rac_fw};
3929 }
3930 if ($sysinfo{bmc}) {
3931 printf q{, BMC='%s'}, $sysinfo{bmc_fw};
3932 }
3933 }
3934
3935 if ($opt{okinfo} >= 2) {
3936 if ($check{storage}) {
3937 my @storageprint = ();
3938 foreach my $id (sort keys %{ $sysinfo{controller} }) {
3939 chomp $sysinfo{controller}{$id}{driver};
3940 push @storageprint, sprintf q{----- CTRL %s (%s): FW='%s', DR='%s'},
3941 $sysinfo{controller}{$id}{id}, $sysinfo{controller}{$id}{name},
3942 $sysinfo{controller}{$id}{firmware}, $sysinfo{controller}{$id}{driver};
3943 }
3944 foreach my $id (sort keys %{ $sysinfo{enclosure} }) {
3945 push @storageprint, sprintf q{----- ENCL %s (%s): FW='%s'},
3946 $sysinfo{enclosure}{$id}->{id}, $sysinfo{enclosure}{$id}->{name},
3947 $sysinfo{enclosure}{$id}->{firmware};
3948 }
3949
3950 # print stuff
3951 foreach my $line (@storageprint) {
3952 print $linebreak, $line;
3953 }
3954 }
3955 }
3956
3957 if ($opt{okinfo} >= 3) {
3958 print "$linebreak----- OpenManage Server Administrator (OMSA) version: '$sysinfo{om}'";
3959 }
3960
3961}
3962else {
3963 if ($opt{extinfo}) {
3964 print $linebreak;
3965 if (defined $opt{htmlinfo}) {
3966 printf '------ SYSTEM: <a href="%s">%s</a>, SN: <a href="%s">%s</a>',
3967 documentation_url($sysinfo{model}), $sysinfo{model},
3968 warranty_url($sysinfo{serial}), $sysinfo{serial};
3969 }
3970 else {
3971 printf '------ SYSTEM: %s, SN: %s',
3972 $sysinfo{model}, $sysinfo{serial};
3973 }
3974 }
3975 if (defined $opt{postmsg}) {
3976 my $post = undef;
3977 if (-f $opt{postmsg}) {
3978 open my $POST, '<', $opt{postmsg}
3979 or ( print $linebreak
3980 and print "ERROR: Couldn't open post message file $opt{postmsg}: $!\n"
3981 and exit $E_UNKNOWN );
3982 $post = <$POST>;
3983 close $POST;
3984 chomp $post;
3985 }
3986 else {
3987 $post = $opt{postmsg};
3988 }
3989 if (defined $post) {
3990 print $linebreak;
3991 $post =~ s{[%]s}{$sysinfo{serial}}gxms;
3992 $post =~ s{[%]m}{$sysinfo{model}}gxms;
3993 $post =~ s{[%]b}{$sysinfo{bios}}gxms;
3994 $post =~ s{[%]d}{$sysinfo{biosdate}}gxms;
3995 $post =~ s{[%]o}{$sysinfo{osname}}gxms;
3996 $post =~ s{[%]r}{$sysinfo{osver}}gxms;
3997 $post =~ s{[%]p}{$count{pdisk}}gxms;
3998 $post =~ s{[%]l}{$count{vdisk}}gxms;
3999 $post =~ s{[%]n}{$linebreak}gxms;
4000 $post =~ s{[%]{2}}{%}gxms;
4001 print $post;
4002 }
4003 }
4004}
4005
4006# Print performance data
4007if (defined $opt{perfdata} && !$opt{debug} && %perfdata) {
4008 my $lb = $opt{perfdata} eq 'multiline' ? "\n" : q{ }; # line break for perfdata
4009 print q{|};
4010
4011 sub perfdata {
4012 my %order
4013 = (
4014 fan => 0,
4015 pwr => 1,
4016 temp => 2,
4017 enclosure => 3,
4018 );
4019 return ($order{(split /_/, $a, 2)[0]} cmp $order{(split /_/, $b, 2)[0]}) || $a cmp $b;
4020 }
4021
4022 print join $lb, map { "'$_'=$perfdata{$_}" } sort perfdata keys %perfdata;
4023}
e133d101 4024
4025# Print a linebreak at the end
669797e1 4026print "\n" if !$opt{debug};
4027
4028# Exit with proper exit code
4029exit $exit_code;