5 # Monitor Dell server hardware status using Dell OpenManage Server
6 # Administrator, either locally via NRPE, or remotely via SNMP.
10 # Copyright (C) 2009 Trond H. Amundsen
12 # This program is free software: you can redistribute it and/or modify
13 # it under the terms of the GNU General Public License as published by
14 # the Free Software Foundation, either version 3 of the License, or
15 # (at your option) any later version.
17 # This program is distributed in the hope that it will be useful, but
18 # WITHOUT ANY WARRANTY; without even the implied warranty of
19 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 # General Public License for more details.
22 # You should have received a copy of the GNU General Public License
23 # along with this program. If not, see <http://www.gnu.org/licenses/>.
26 require 5.006; # Perl v5.6.0 or newer is required
29 use POSIX qw(isatty ceil);
30 use Getopt::Long qw(:config no_ignore_case);
32 # Global (package) variables used throughout the code
33 use vars qw( $NAME $VERSION $AUTHOR $CONTACT $E_OK $E_WARNING $E_CRITICAL
34 $E_UNKNOWN $FW_LOCK $USAGE $HELP $LICENSE
35 $snmp_session $snmp_error $omreport $globalstatus $global
36 $linebreak $omopt_chassis $omopt_system $blade
38 %check %opt %perfdata %reverse_exitcode %status2nagios
39 %snmp_status %snmp_probestatus %probestatus2nagios %sysinfo
40 %blacklist %nagios_alert_count %count
41 @controllers @enclosures
42 @report_storage @report_chassis @report_other
45 #---------------------------------------------------------------------
46 # Initialization and global variables
47 #---------------------------------------------------------------------
49 # If we don't have a TTY, the plugin is probably run by Nagios. In
50 # that case, redirect all output to STDERR to STDOUT. Nagios ignores
52 if (! isatty *STDOUT) {
53 open STDERR, '>&', 'STDOUT';
56 # Version and similar info
57 $NAME = 'check_openmanage';
58 $VERSION = '3.5.0-beta8';
59 $AUTHOR = 'Trond H. Amundsen';
60 $CONTACT = 't.h.amundsen@usit.uio.no';
68 # Firmware update lock file [FIXME: location on Windows?]
69 $FW_LOCK = '/var/lock/.spsetup'; # default on Linux
72 $USAGE = <<"END_USAGE";
73 Usage: $NAME [OPTION]...
81 -p, --perfdata Output performance data
82 -t, --timeout Plugin timeout in seconds
83 -c, --critical Customise temperature critical limits
84 -w, --warning Customise temperature warning limits
85 -d, --debug Debug output, reports everything
86 -h, --help Display this help text
87 -V, --version Display version info
91 -H, --hostname Hostname or IP of the server (needed for SNMP)
92 -C, --community SNMP community string
93 -P, --protocol SNMP protocol version
94 --port SNMP port number
98 -i, --info Prefix any alerts with the service tag
99 -e, --extinfo Append system info to alerts
100 -s, --state Prefix alerts with alert state
101 --short-state Prefix alerts with alert state (abbreviated)
102 -o, --okinfo Verbosity when check result is OK
103 --htmlinfo HTML output with clickable links
105 CHECK CONTROL AND BLACKLISTING:
107 -a, --all Check everything, even log content
108 -b, --blacklist Blacklist missing and/or failed components
109 --only Only check a certain component or alert type
110 --check Fine-tune which components are checked
112 For more information and advanced options, see the manual page or URL:
113 http://folk.uio.no/trondham/software/check_openmanage.html
116 # Version and license text
117 $LICENSE = <<"END_LICENSE";
119 Copyright (C) 2009 $AUTHOR
120 License GPLv3+: GNU GPL version 3 or later <http://gnu.org/licenses/gpl.html>
121 This is free software: you are free to change and redistribute it.
122 There is NO WARRANTY, to the extent permitted by law.
124 Written by $AUTHOR <$CONTACT>
127 # Options with default values
128 %opt = ( 'blacklist' => [],
132 'timeout' => 30, # default timeout is 30 seconds
142 'okinfo' => 0, # default "ok" output level
143 'linebreak' => undef,
147 'port' => 161, # default SNMP port
149 'community' => 'public', # SMNP v1 or v2c
151 'username' => undef, # SMNP v3
152 'authpassword' => undef, # SMNP v3
153 'authkey' => undef, # SMNP v3
154 'authprotocol' => undef, # SMNP v3
155 'privpassword' => undef, # SMNP v3
156 'privkey' => undef, # SMNP v3
157 'privprotocol' => undef, # SMNP v3
161 GetOptions('b|blacklist=s' => \@{ $opt{blacklist} },
162 'check=s' => \@{ $opt{check} },
163 'c|critical=s' => \@{ $opt{critical} },
164 'w|warning=s' => \@{ $opt{warning} },
165 't|timeout=i' => \$opt{timeout},
166 'd|debug' => \$opt{debug},
167 'h|help' => \$opt{help},
168 'V|version' => \$opt{version},
169 'p|perfdata:s' => \$opt{perfdata},
170 'i|info' => \$opt{info},
171 'e|extinfo' => \$opt{extinfo},
172 'htmlinfo:s' => \$opt{htmlinfo},
173 'postmsg=s' => \$opt{postmsg},
174 's|state' => \$opt{state},
175 'short-state' => \$opt{shortstate},
176 'o|ok-info=i' => \$opt{okinfo},
177 'l|linebreak=s' => \$opt{linebreak},
178 'a|all' => \$opt{all},
179 'only=s' => \$opt{only},
180 'port=i' => \$opt{port},
181 'H|hostname=s' => \$opt{hostname},
182 'C|community=s' => \$opt{community},
183 'P|protocol=i' => \$opt{protocol},
184 'U|username=s' => \$opt{username},
185 'authpassword=s' => \$opt{authpassword},
186 'authkey=s' => \$opt{authkey},
187 'authprotocol=s' => \$opt{authprotocol},
188 'privpassword=s' => \$opt{privpassword},
189 'privkey=s' => \$opt{privkey},
190 'privprotocol=s' => \$opt{privprotocol},
191 ) or do { print $USAGE; exit $E_UNKNOWN };
193 # If user requested help
199 # If user requested version info
207 print "PLUGIN TIMEOUT: $NAME timed out after $opt{timeout} seconds\n";
212 # If we're using SNMP
213 $snmp = defined $opt{hostname} ? 1 : 0;
215 # SNMP session variables
216 $snmp_session = undef;
219 # The omreport command
222 # Check flags, override available with the --check option
223 %check = ( 'storage' => 1, # check storage subsystem
224 'memory' => 1, # check memory (dimms)
225 'fans' => 1, # check fan status
226 'power' => 1, # check power supplies
227 'temp' => 1, # check temperature
228 'cpu' => 1, # check processors
229 'voltage' => 1, # check voltage
230 'batteries' => 1, # check battery probes
231 'amperage' => 1, # check power consumption
232 'intrusion' => 1, # check intrusion detection
233 'alertlog' => 0, # check the alert log
234 'esmlog' => 0, # check the ESM log (hardware log)
235 'esmhealth' => 1, # check the ESM log overall health
239 $linebreak = isatty(*STDOUT) ? "\n" : '<br/>';
241 # Line break from option
242 if (defined $opt{linebreak}) {
243 if ($opt{linebreak} eq 'REG') {
246 elsif ($opt{linebreak} eq 'HTML') {
247 $linebreak = '<br/>';
250 $linebreak = $opt{linebreak};
254 # Exit with status=UNKNOWN if there is firmware upgrade in progress
255 if (!$snmp && -f $FW_LOCK) {
256 print "MONITORING DISABLED - Firmware update in progress ($FW_LOCK exists)\n";
260 # List of controllers and enclosures
261 @controllers = (); # controllers
262 @enclosures = (); # enclosures
265 @report_storage = (); # messages with associated nagios level (storage)
266 @report_chassis = (); # messages with associated nagios level (chassis)
267 @report_other = (); # messages with associated nagios level (other)
269 # Counters for everything
272 'pdisk' => 0, # number of physical disks
273 'vdisk' => 0, # number of logical drives (virtual disks)
274 'temp' => 0, # number of temperature probes
275 'volt' => 0, # number of voltage probes
276 'amp' => 0, # number of amperage probes
277 'intr' => 0, # number of intrusion probes
278 'dimm' => 0, # number of memory modules
279 'fan' => 0, # number of fan probes
280 'cpu' => 0, # number of CPUs
281 'bat' => 0, # number of batteries
282 'power' => 0, # number of power supplies
284 'Critical' => 0, # critical entries in ESM log
285 'Non-Critical' => 0, # warning entries in ESM log
286 'Ok' => 0, # ok entries in ESM log
289 'Critical' => 0, # critical entries in alert log
290 'Non-Critical' => 0, # warning entries in alert log
291 'Ok' => 0, # ok entries in alert log
298 # Global health status
299 $global = 1; # default is to check global status
300 $globalstatus = $E_OK; # default global health status is "OK"
302 # Nagios error levels reversed
306 $E_WARNING => 'WARNING',
307 $E_CRITICAL => 'CRITICAL',
308 $E_UNKNOWN => 'UNKNOWN',
311 # OpenManage (omreport) and SNMP error levels
314 'Unknown' => $E_CRITICAL,
315 'Critical' => $E_CRITICAL,
316 'Non-Critical' => $E_WARNING,
318 'Non-Recoverable' => $E_CRITICAL,
319 'Other' => $E_CRITICAL,
330 6 => 'Non-Recoverable',
333 # Probe Status via SNMP
336 1 => 'Other', # probe status is not one of the following:
337 2 => 'Unknown', # probe status is unknown (not known or monitored)
338 3 => 'Ok', # probe is reporting a value within the thresholds
339 4 => 'nonCriticalUpper', # probe has crossed upper noncritical threshold
340 5 => 'criticalUpper', # probe has crossed upper critical threshold
341 6 => 'nonRecoverableUpper', # probe has crossed upper non-recoverable threshold
342 7 => 'nonCriticalLower', # probe has crossed lower noncritical threshold
343 8 => 'criticalLower', # probe has crossed lower critical threshold
344 9 => 'nonRecoverableLower', # probe has crossed lower non-recoverable threshold
345 10 => 'failed', # probe is not functional
348 # Probe status translated to Nagios alarm levels
351 'Other' => $E_CRITICAL,
352 'Unknown' => $E_CRITICAL,
354 'nonCriticalUpper' => $E_WARNING,
355 'criticalUpper' => $E_CRITICAL,
356 'nonRecoverableUpper' => $E_CRITICAL,
357 'nonCriticalLower' => $E_WARNING,
358 'criticalLower' => $E_CRITICAL,
359 'nonRecoverableLower' => $E_CRITICAL,
360 'failed' => $E_CRITICAL,
363 # System information gathered
366 'bios' => 'N/A', # BIOS version
367 'biosdate' => 'N/A', # BIOS release date
368 'serial' => 'N/A', # serial number (service tag)
369 'model' => 'N/A', # system model
370 'osname' => 'N/A', # OS name
371 'osver' => 'N/A', # OS version
372 'om' => 'N/A', # OMSA version
373 'bmc' => 0, # HAS baseboard management controller (BMC)
374 'rac' => 0, # HAS remote access controller (RAC)
375 'rac_name' => 'N/A', # remote access controller (RAC)
376 'bmc_fw' => 'N/A', # BMC firmware
377 'rac_fw' => 'N/A', # RAC firmware
380 # Adjust which checks to perform
381 adjust_checks() if defined $opt{check};
383 # Blacklisted components
384 %blacklist = defined $opt{blacklist} ? %{ get_blacklist() } : ();
386 # If blacklisting is in effect, don't check global health status
387 if (scalar keys %blacklist > 0) {
391 # Take into account new hardware and blades
392 $omopt_chassis = 'chassis'; # default "chassis" option to omreport
393 $omopt_system = 'system'; # default "system" option to omreport
394 $blade = 0; # if this is a blade system
396 # Some initializations and checking before we begin
398 snmp_initialize(); # initialize SNMP
399 snmp_check(); # check that SNMP works
400 snmp_detect_blade(); # detect blade via SNMP
403 # Find the omreport binary
405 # Check help output from omreport, see which options are available.
406 # Also detecting blade via omreport.
407 check_omreport_options();
411 #---------------------------------------------------------------------
413 #---------------------------------------------------------------------
416 # Store a message in one of the message arrays
419 my ($type, $msg, $exval, $id) = @_;
420 defined $id or $id = q{};
424 'storage' => \@report_storage,
425 'chassis' => \@report_chassis,
426 'other' => \@report_other,
429 return push @{ $type2array{$type} }, [ $msg, $exval, $id ];
434 # Run command, put resulting output lines in an array and return a
435 # pointer to that array
440 open my $CMD, '-|', $command
441 or do { report('other', "Couldn't run command '$command': $!", $E_UNKNOWN)
445 or do { report('other', "Couldn't close filehandle for command '$command': $!", $E_UNKNOWN)
446 and return \@lines };
451 # Run command, put resulting output in a string variable and return it
456 open my $CMD, '-|', $command
457 or do { report('other', "Couldn't run command '$command': $!", $E_UNKNOWN) and return };
458 my $rawtext = do { local $/ = undef; <$CMD> }; # slurping
461 # NOTE: We don't check the return value of close() since omreport
462 # does something weird sometimes.
470 sub snmp_initialize {
471 # Legal SNMP v3 protocols
472 my $snmp_v3_privprotocol = qr{\A des|aes|aes128|3des|3desde \z}xms;
473 my $snmp_v3_authprotocol = qr{\A md5|sha \z}xms;
475 # Parameters to Net::SNMP->session()
478 '-port' => $opt{port},
479 '-hostname' => $opt{hostname},
480 '-version' => $opt{protocol},
483 # Parameters for SNMP v3
484 if ($opt{protocol} == 3) {
486 # Username is mandatory
487 if (defined $opt{username}) {
488 $param{'-username'} = $opt{username};
491 print "SNMP ERROR: With SNMPv3 the username must be specified\n";
495 # Authpassword is optional
496 if (defined $opt{authpassword}) {
497 $param{'-authpassword'} = $opt{authpassword};
500 # Authkey is optional
501 if (defined $opt{authkey}) {
502 $param{'-authkey'} = $opt{authkey};
505 # Privpassword is optional
506 if (defined $opt{privpassword}) {
507 $param{'-privpassword'} = $opt{privpassword};
510 # Privkey is optional
511 if (defined $opt{privkey}) {
512 $param{'-privkey'} = $opt{privkey};
515 # Privprotocol is optional
516 if (defined $opt{privprotocol}) {
517 if ($opt{privprotocol} =~ m/$snmp_v3_privprotocol/xms) {
518 $param{'-privprotocol'} = $opt{privprotocol};
521 print "SNMP ERROR: Unknown privprotocol '$opt{privprotocol}', "
522 . "must be one of [des|aes|aes128|3des|3desde]\n";
527 # Authprotocol is optional
528 if (defined $opt{authprotocol}) {
529 if ($opt{authprotocol} =~ m/$snmp_v3_authprotocol/xms) {
530 $param{'-authprotocol'} = $opt{authprotocol};
533 print "SNMP ERROR: Unknown authprotocol '$opt{authprotocol}', "
534 . "must be one of [md5|sha]\n";
539 # Parameters for SNMP v2c or v1
540 elsif ($opt{protocol} == 2 or $opt{protocol} == 1) {
541 $param{'-community'} = $opt{community};
544 print "SNMP ERROR: Unknown SNMP version '$opt{protocol}'\n";
548 # Try to initialize the SNMP session
549 if ( eval { require Net::SNMP; 1 } ) {
550 ($snmp_session, $snmp_error) = Net::SNMP->session( %param );
551 if (!defined $snmp_session) {
552 printf "SNMP: %s\n", $snmp_error;
557 print "You need perl module Net::SNMP to run $NAME in SNMP mode\n";
564 # Checking if SNMP works by probing for "chassisModelName", which all
565 # servers should have
568 my $chassisModelName = '1.3.6.1.4.1.674.10892.1.300.10.1.9.1';
569 my $result = $snmp_session->get_request(-varbindlist => [$chassisModelName]);
571 # Typically if remote host isn't responding
572 if (!defined $result) {
573 printf "SNMP CRITICAL: %s\n", $snmp_session->error;
577 # If OpenManage isn't installed or is not working
578 if ($result->{$chassisModelName} =~ m{\A noSuch (Instance|Object) \z}xms) {
579 print "ERROR: (SNMP) OpenManage is not installed or is not working correctly\n";
586 # Detecting blade via SNMP
588 sub snmp_detect_blade {
589 my $DellBaseBoardType = '1.3.6.1.4.1.674.10892.1.300.80.1.7.1.1';
590 my $result = $snmp_session->get_request(-varbindlist => [$DellBaseBoardType]);
592 # Identify blade. Older models (4th and 5th gen models) and/or old
593 # OMSA (4.x) don't have this OID. If we get "noSuchInstance" or
594 # similar, we assume that this isn't a blade
595 if ($result->{$DellBaseBoardType} eq '3') {
602 # Locate the omreport binary
605 # Possible full paths for omreport
608 '/usr/bin/omreport', # default on Linux
609 '/opt/dell/srvadmin/oma/bin/omreport.sh', # alternate on Linux
610 '/opt/dell/srvadmin/oma/bin/omreport', # alternate on Linux
611 'c:\progra~1\dell\sysmgt\oma\bin\omreport.exe', # default on Windows
612 'c:\progra~2\dell\sysmgt\oma\bin\omreport.exe', # default on Windows x64
615 # Find the one to use
617 foreach my $bin (@omreport_paths) {
624 # Exit with status=UNKNOWN if OM is not installed, or we don't
625 # have permission to execute the binary
626 if (!defined $omreport) {
627 print "ERROR: Dell OpenManage Server Administrator (OMSA) is not installed\n";
634 # Checks output from 'omreport -?' and searches for arguments to
635 # omreport, to accommodate deprecated options "chassis" and "system"
636 # (on newer hardware), as well as blade servers.
638 sub check_omreport_options {
639 foreach (@{ run_command("$omreport -? 2>&1") }) {
640 if (m/\A servermodule /xms) {
641 # If "servermodule" argument to omreport exists, use it
642 # instead of argument "system"
643 $omopt_system = 'servermodule';
645 elsif (m/\A mainsystem /xms) {
646 # If "mainsystem" argument to omreport exists, use it
647 # instead of argument "chassis"
648 $omopt_chassis = 'mainsystem';
650 elsif (m/\A modularenclosure /xms) {
651 # If "modularenclusure" argument to omreport exists, assume
652 # that this is a blade
660 # Read the blacklist option and return a hash containing the
661 # blacklisted components
667 if (scalar @{ $opt{blacklist} } >= 0) {
668 foreach my $black (@{ $opt{blacklist} }) {
671 open my $BL, '<', $black
672 or do { report('other', "Couldn't open blacklist file $black: $!", $E_UNKNOWN)
685 return {} if $#bl < 0;
687 # Parse blacklist string, put in hash
688 foreach my $black (@bl) {
689 my @comps = split m{/}xms, $black;
690 foreach my $c (@comps) {
691 next if $c !~ m/=/xms;
692 my ($key, $val) = split /=/xms, $c;
693 my @vals = split /,/xms, $val;
694 $blacklist{$key} = \@vals;
702 # Read the check option and adjust the hash %check, which is a rough
703 # list of components to be checked
708 # Adjust checking based on the '--all' option
711 if (defined $opt{only} and $opt{only} !~ m{\A critical|warning \z}xms) {
712 print qq{ERROR: Wrong simultaneous usage of the "--all" and "--only" options\n};
715 if (scalar @{ $opt{check} } > 0) {
716 print qq{ERROR: Wrong simultaneous usage of the "--all" and "--check" options\n};
720 # set the check hash to check everything
721 map { $_ = 1 } values %check;
726 # Adjust checking based on the '--only' option
727 if (defined $opt{only} and $opt{only} !~ m{\A critical|warning \z}xms) {
729 if (scalar @{ $opt{check} } > 0) {
730 print qq{ERROR: Wrong simultaneous usage of the "--only" and "--check" options\n};
733 if (! exists $check{$opt{only}} and $opt{only} ne 'chassis') {
734 print qq{ERROR: "$opt{only}" is not a known keyword for the "--only" option\n};
738 # reset the check hash
739 map { $_ = 0 } values %check;
741 # adjust the check hash
742 if ($opt{only} eq 'chassis') {
743 map { $check{$_} = 1 } qw(memory fans power temp cpu voltage
744 batteries amperage intrusion esmhealth);
747 $check{$opt{only}} = 1;
753 # Adjust checking based on the '--check' option
754 if (scalar @{ $opt{check} } >= 0) {
755 foreach my $check (@{ $opt{check} }) {
758 open my $CL, '<', $check
759 or do { report('other', "Couldn't open check file $check: $!", $E_UNKNOWN) and return };
772 # Parse checklist string, put in hash
773 foreach my $check (@cl) {
774 my @checks = split /,/xms, $check;
775 foreach my $c (@checks) {
776 next if $c !~ m/=/xms;
777 my ($key, $val) = split /=/xms, $c;
782 # Check if we should check global health status
784 foreach (keys %check) {
785 next CHECK_KEY if $_ eq 'esmlog'; # not part of global status
786 next CHECK_KEY if $_ eq 'alertlog'; # not part of global status
788 if ($check{$_} == 0) { # found something with checking turned off
798 # Runs omreport and returns an array of anonymous hashes containing
800 # Takes one argument: string containing parameters to omreport
807 # Errors that are OK. Some low-end poweredge (and blades) models
808 # don't have RAID controllers, intrusion detection sensor, or
809 # redundant/instrumented power supplies etc.
812 Intrusion\sinformation\sis\snot\sfound\sfor\sthis\ssystem # No intrusion probe
813 | No\sinstrumented\spower\ssupplies\sfound\son\sthis\ssystem # No instrumented PS (blades/low-end)
814 | No\scontrollers\sfound # No RAID controller
815 | No\sbattery\sprobes\sfound\son\sthis\ssystem # No battery probes
816 | Invalid\scommand:\spwrmonitoring # Older OMSAs lack this command(?)
819 # Errors that are OK on blade servers
822 No\sfan\sprobes\sfound\son\sthis\ssystem # No fan probes
825 # Run omreport and fetch output
826 my $rawtext = slurp_command("$omreport $command -fmt ssv 2>&1");
827 return [] if !defined $rawtext;
829 # Workaround for Openmanage BUG introduced in OMSA 5.5.0
830 $rawtext =~ s/\n;/;/gxms if $command eq 'storage controller';
832 # Parse output, store in array
833 for ((split /\n/xms, $rawtext)) {
834 if (m/\A Error/xms) {
835 next if m{$ok_errors}xms;
836 next if ($blade and m{$ok_blade_errors}xms);
837 report('other', "Problem running 'omreport $command': $_", $E_UNKNOWN);
840 next if !m/(.*?;){2}/xms; # ignore lines with less than 3 fields
841 my @vals = split /;/xms;
842 if ($vals[0] =~ m/\A (Index|ID|Severity) \z/xms) {
847 push @output, { map { $_ => $vals[$i++] } @keys };
852 # Finally, return the collected information
858 # Checks if a component is blacklisted. Returns 1 if the component is
859 # blacklisted, 0 otherwise. Takes two arguments:
860 # arg1: component name
861 # arg2: component id or index
864 my $name = shift; # component name
865 my $id = shift; # component id
866 my $ret = 0; # return value
868 if (defined $blacklist{$name}) {
869 foreach my $comp (@{ $blacklist{$name} }) {
870 if (defined $id and $comp eq $id) {
879 # Converts the NexusID from SNMP to our version
882 $nexus =~ s{\A \\}{}xms;
883 $nexus =~ s{\\}{:}gxms;
887 # Sets custom temperature thresholds based on user supplied options
888 sub custom_temperature_thresholds {
889 my $type = shift; # type of threshold, either w (warning) or c (critical)
890 my %thres = (); # will contain the thresholds
891 my @limits = (); # holds the input
893 my @opt = $type eq 'w' ? @{ $opt{warning} } : @{ $opt{critical} };
895 if (scalar @opt >= 0) {
896 foreach my $t (@opt) {
900 or do { report('other', "Couldn't open temperature threshold file $t: $!",
901 $E_UNKNOWN) and return {} };
912 # Parse checklist string, put in hash
913 foreach my $th (@limits) {
914 my @tmp = split m{,}xms, $th;
915 foreach my $t (@tmp) {
916 next if $t !~ m{=}xms;
917 my ($key, $val) = split m{=}xms, $t;
918 if ($val =~ m{/}xms) {
919 my ($max, $min) = split m{/}xms, $val;
920 $thres{$key}{max} = $max;
921 $thres{$key}{min} = $min;
924 $thres{$key}{max} = $val;
933 # Gets the output from SNMP result according to the OIDs checked
934 sub get_snmp_output {
935 my ($result,$oidref) = @_;
938 foreach my $oid (keys %{ $result }) {
939 my @dummy = split /\./xms, $oid;
942 my $foo = join q{.}, @dummy;
943 if (exists $oidref->{$foo}) {
944 $output[$id]{$oidref->{$foo}} = $result->{$oid};
951 # Map the controller or other item in-place
953 my ($key, $val, $list) = @_;
955 foreach my $lst (@{ $list }) {
956 if (!exists $lst->{$key}) {
963 # Return the URL for official Dell documentation for a specific
965 sub documentation_url {
968 # create model short form, e.g. "r710"
969 $model =~ s{\A PowerEdge \s (.+?) \z}{lc($1)}exms;
971 # special case for blades (e.g. M600, M710), they have common
973 $model =~ s{\A m\d+ \z}{m}xms;
975 return 'http://support.dell.com/support/edocs/systems/pe' . $model . '/';
978 # Return the URL for warranty information for a server with a given
979 # serial number (servicetag)
983 # Dell support sites for different parts of the world
986 'emea' => 'http://support.euro.dell.com/support/topics/topic.aspx/emea/shared/support/my_systems_info/',
987 'ap' => 'http://supportapj.dell.com/support/topics/topic.aspx/ap/shared/support/my_systems_info/en/details?',
988 'glob' => 'http://support.dell.com/support/topics/global.aspx/support/my_systems_info/details?',
991 # warranty URLs for different country codes
995 'at' => $supportsite{emea} . 'de/details?c=at&l=de&ServiceTag=', # Austria
996 'be' => $supportsite{emea} . 'nl/details?c=be&l=nl&ServiceTag=', # Belgium
997 'cz' => $supportsite{emea} . 'cs/details?c=cz&l=cs&ServiceTag=', # Czech Republic
998 'de' => $supportsite{emea} . 'de/details?c=de&l=de&ServiceTag=', # Germany
999 'dk' => $supportsite{emea} . 'da/details?c=dk&l=da&ServiceTag=', # Denmark
1000 'es' => $supportsite{emea} . 'es/details?c=es&l=es&ServiceTag=', # Spain
1001 'fi' => $supportsite{emea} . 'fi/details?c=fi&l=fi&ServiceTag=', # Finland
1002 'fr' => $supportsite{emea} . 'fr/details?c=fr&l=fr&ServiceTag=', # France
1003 'gr' => $supportsite{emea} . 'en/details?c=gr&l=el&ServiceTag=', # Greece
1004 'it' => $supportsite{emea} . 'it/details?c=it&l=it&ServiceTag=', # Italy
1005 'il' => $supportsite{emea} . 'en/details?c=il&l=en&ServiceTag=', # Israel
1006 'me' => $supportsite{emea} . 'en/details?c=me&l=en&ServiceTag=', # Middle East
1007 'no' => $supportsite{emea} . 'no/details?c=no&l=no&ServiceTag=', # Norway
1008 'nl' => $supportsite{emea} . 'nl/details?c=nl&l=nl&ServiceTag=', # The Netherlands
1009 'pl' => $supportsite{emea} . 'pl/details?c=pl&l=pl&ServiceTag=', # Poland
1010 'pt' => $supportsite{emea} . 'en/details?c=pt&l=pt&ServiceTag=', # Portugal
1011 'ru' => $supportsite{emea} . 'ru/details?c=ru&l=ru&ServiceTag=', # Russia
1012 'se' => $supportsite{emea} . 'sv/details?c=se&l=sv&ServiceTag=', # Sweden
1013 'uk' => $supportsite{emea} . 'en/details?c=uk&l=en&ServiceTag=', # United Kingdom
1014 'za' => $supportsite{emea} . 'en/details?c=za&l=en&ServiceTag=', # South Africa
1016 'br' => $supportsite{glob} . 'c=br&l=pt&ServiceTag=', # Brazil
1017 'ca' => $supportsite{glob} . 'c=ca&l=en&ServiceTag=', # Canada
1018 'mx' => $supportsite{glob} . 'c=mx&l=es&ServiceTag=', # Mexico
1019 'us' => $supportsite{glob} . 'c=us&l=en&ServiceTag=', # USA
1021 'au' => $supportsite{ap} . 'c=au&l=en&ServiceTag=', # Australia
1022 'cn' => $supportsite{ap} . 'c=cn&l=zh&ServiceTag=', # China
1023 'in' => $supportsite{ap} . 'c=in&l=en&ServiceTag=', # India
1025 'XX' => $supportsite{glob} . 'ServiceTag=', # default
1028 if (exists $url{$opt{htmlinfo}}) {
1029 return $url{$opt{htmlinfo}} . $tag;
1032 return $url{XX} . $tag;
1038 #---------------------------------------------------------------------
1040 #---------------------------------------------------------------------
1042 #-----------------------------------------
1043 # Check global health status
1044 #-----------------------------------------
1050 # Checks global status, i.e. both storage and chassis
1052 my $systemStateGlobalSystemStatus = '1.3.6.1.4.1.674.10892.1.200.10.1.2.1';
1053 my $result = $snmp_session->get_request(-varbindlist => [$systemStateGlobalSystemStatus]);
1054 if (!defined $result) {
1055 printf "SNMP [systemStateGlobalSystemStatus]: %s\n", $snmp_error;
1058 $health = $status2nagios{$snmp_status{$result->{$systemStateGlobalSystemStatus}}};
1062 # NB! This does not check storage, only chassis...
1064 foreach (@{ run_command("$omreport $omopt_system -fmt ssv") }) {
1066 next if m/\A SEVERITY;COMPONENT/xms;
1067 if (m/\A (.+?);Main\sSystem(\sChassis)? /xms) {
1068 $health = $status2nagios{$1};
1079 #-----------------------------------------
1080 # STORAGE: Check controllers
1081 #-----------------------------------------
1082 sub check_controllers {
1090 my $firmware = undef;
1097 '1.3.6.1.4.1.674.10893.1.20.130.1.1.1' => 'controllerNumber',
1098 '1.3.6.1.4.1.674.10893.1.20.130.1.1.2' => 'controllerName',
1099 '1.3.6.1.4.1.674.10893.1.20.130.1.1.5' => 'controllerState',
1100 '1.3.6.1.4.1.674.10893.1.20.130.1.1.8' => 'controllerFWVersion',
1101 '1.3.6.1.4.1.674.10893.1.20.130.1.1.38' => 'controllerComponentStatus',
1102 '1.3.6.1.4.1.674.10893.1.20.130.1.1.39' => 'controllerNexusID',
1103 '1.3.6.1.4.1.674.10893.1.20.130.1.1.41' => 'controllerDriverVersion',
1104 '1.3.6.1.4.1.674.10893.1.20.130.1.1.44' => 'controllerMinFWVersion',
1105 '1.3.6.1.4.1.674.10893.1.20.130.1.1.45' => 'controllerMinDriverVersion',
1108 # We use get_table() here for the odd case where a server has
1109 # two or more controllers, and where some OIDs are missing on
1110 # one of the controllers.
1111 my $controllerTable = '1.3.6.1.4.1.674.10893.1.20.130.1';
1112 my $result = $snmp_session->get_table(-baseoid => $controllerTable);
1114 # No controllers is OK
1115 return if !defined $result;
1117 @output = @{ get_snmp_output($result, \%ctrl_oid) };
1120 @output = @{ run_omreport('storage controller') };
1134 foreach my $out (@output) {
1136 $id = $out->{'controllerNumber'} - 1;
1137 $name = $out->{'controllerName'};
1138 $state = $ctrl_state{$out->{'controllerState'}};
1139 $status = $snmp_status{$out->{'controllerComponentStatus'}};
1140 $minfw = exists $out->{'controllerMinFWVersion'}
1141 ? $out->{'controllerMinFWVersion'} : undef;
1142 $mindr = exists $out->{'controllerMinDriverVersion'}
1143 ? $out->{'controllerMinDriverVersion'} : undef;
1144 $firmware = exists $out->{controllerFWVersion}
1145 ? $out->{controllerFWVersion} : 'N/A';
1146 $driver = exists $out->{controllerDriverVersion}
1147 ? $out->{controllerDriverVersion} : 'N/A';
1148 $nexus = convert_nexus($out->{controllerNexusID});
1152 $name = $out->{Name};
1153 $state = $out->{State};
1154 $status = $out->{Status};
1155 $minfw = $out->{'Minimum Required Firmware Version'} ne 'Not Applicable'
1156 ? $out->{'Minimum Required Firmware Version'} : undef;
1157 $mindr = $out->{'Minimum Required Driver Version'} ne 'Not Applicable'
1158 ? $out->{'Minimum Required Driver Version'} : undef;
1159 $firmware = $out->{'Firmware Version'} ne 'Not Applicable'
1160 ? $out->{'Firmware Version'} : 'N/A';
1161 $driver = $out->{'Driver Version'} ne 'Not Applicable'
1162 ? $out->{'Driver Version'} : 'N/A';
1166 $name =~ s{\s+\z}{}xms; # remove trailing whitespace
1167 push @controllers, $id;
1169 # Collecting some storage info
1170 $sysinfo{'controller'}{$id}{'id'} = $nexus;
1171 $sysinfo{'controller'}{$id}{'name'} = $name;
1172 $sysinfo{'controller'}{$id}{'driver'} = $driver;
1173 $sysinfo{'controller'}{$id}{'firmware'} = $firmware;
1175 next CTRL if blacklisted('ctrl', $nexus);
1177 # Special case: old firmware
1178 if (!blacklisted('ctrl_fw', $id) && defined $minfw) {
1180 my $msg = sprintf 'Controller %d (%s): Firmware is out of date (%s)',
1181 $id, $name, $firmware;
1182 report('storage', $msg, $E_WARNING, $nexus);
1184 # Special case: old driver
1185 if (!blacklisted('ctrl_driver', $id) && defined $mindr) {
1187 my $msg = sprintf 'Controller %d (%s): Driver is out of date (%s)',
1188 $id, $name, $driver;
1189 report('storage', $msg, $E_WARNING, $nexus);
1192 if ($status eq 'Ok' or ($status eq 'Non-Critical'
1193 and (defined $minfw or defined $mindr))) {
1194 my $msg = sprintf 'Controller %d (%s) is %s',
1196 report('storage', $msg, $E_OK, $nexus);
1200 my $msg = sprintf 'Controller %d (%s) needs attention: %s',
1202 report('storage', $msg, $status2nagios{$status}, $nexus);
1209 #-----------------------------------------
1210 # STORAGE: Check physical drives
1211 #-----------------------------------------
1212 sub check_physical_disks {
1213 return if $#controllers == -1;
1223 my $vendor = undef; # disk vendor
1224 my $product = undef; # product ID
1225 my $capacity = undef; # disk length (size) in bytes
1231 '1.3.6.1.4.1.674.10893.1.20.130.4.1.1' => 'arrayDiskNumber',
1232 '1.3.6.1.4.1.674.10893.1.20.130.4.1.2' => 'arrayDiskName',
1233 '1.3.6.1.4.1.674.10893.1.20.130.4.1.3' => 'arrayDiskVendor',
1234 '1.3.6.1.4.1.674.10893.1.20.130.4.1.4' => 'arrayDiskState',
1235 '1.3.6.1.4.1.674.10893.1.20.130.4.1.6' => 'arrayDiskProductID',
1236 '1.3.6.1.4.1.674.10893.1.20.130.4.1.9' => 'arrayDiskEnclosureID',
1237 '1.3.6.1.4.1.674.10893.1.20.130.4.1.10' => 'arrayDiskChannel',
1238 '1.3.6.1.4.1.674.10893.1.20.130.4.1.11' => 'arrayDiskLengthInMB',
1239 '1.3.6.1.4.1.674.10893.1.20.130.4.1.15' => 'arrayDiskTargetID',
1240 '1.3.6.1.4.1.674.10893.1.20.130.4.1.16' => 'arrayDiskLunID',
1241 '1.3.6.1.4.1.674.10893.1.20.130.4.1.24' => 'arrayDiskComponentStatus',
1242 '1.3.6.1.4.1.674.10893.1.20.130.4.1.26' => 'arrayDiskNexusID',
1243 '1.3.6.1.4.1.674.10893.1.20.130.4.1.31' => 'arrayDiskSmartAlertIndication',
1244 '1.3.6.1.4.1.674.10893.1.20.130.5.1.5' => 'arrayDiskEnclosureConnectionEnclosureNumber',
1245 '1.3.6.1.4.1.674.10893.1.20.130.5.1.7' => 'arrayDiskEnclosureConnectionControllerNumber',
1247 my $result = $snmp_session->get_entries(-columns => [keys %pdisk_oid]);
1249 if (!defined $result) {
1250 printf "SNMP [storage / pdisk]: %s.\n", $snmp_session->error;
1251 $snmp_session->close;
1255 @output = @{ get_snmp_output($result, \%pdisk_oid) };
1258 foreach my $c (@controllers) {
1259 push @output, @{ run_omreport("storage pdisk controller=$c") };
1260 map_item('ctrl', $c, \@output);
1278 28 => 'Diagnostics',
1279 34 => 'Predictive failure',
1280 35 => 'Initializing',
1283 41 => 'Unsupported',
1284 53 => 'Incompatible',
1287 # Check physical disks on each of the controllers
1289 foreach my $out (@output) {
1291 $name = $out->{arrayDiskName};
1292 if ($name =~ m{.*\d+:\d+:\d+\z}xms) {
1293 $id = join q{:}, ($out->{arrayDiskChannel}, $out->{arrayDiskEnclosureID},
1294 $out->{arrayDiskTargetID});
1297 $id = join q{:}, ($out->{arrayDiskChannel}, $out->{arrayDiskTargetID});
1299 $state = $pdisk_state{$out->{arrayDiskState}};
1300 $status = $snmp_status{$out->{arrayDiskComponentStatus}};
1301 $fpred = $out->{arrayDiskSmartAlertIndication} == 2 ? 1 : 0;
1303 $ctrl = exists $out->{arrayDiskEnclosureConnectionControllerNumber}
1304 ? $out->{arrayDiskEnclosureConnectionControllerNumber} - 1
1306 $nexus = convert_nexus($out->{arrayDiskNexusID});
1307 $vendor = $out->{arrayDiskVendor};
1308 $product = $out->{arrayDiskProductID};
1309 $capacity = $out->{arrayDiskLengthInMB} * 1024**2;
1313 $name = $out->{'Name'};
1314 $state = $out->{'State'};
1315 $status = $out->{'Status'};
1316 $fpred = lc($out->{'Failure Predicted'}) eq 'yes' ? 1 : 0;
1317 $progr = ' [' . $out->{'Progress'} . ']';
1318 $ctrl = $out->{'ctrl'};
1319 $nexus = join q{:}, $out->{ctrl}, $id;
1320 $vendor = $out->{'Vendor ID'};
1321 $product = $out->{'Product ID'};
1322 $capacity = $out->{'Capacity'};
1323 $capacity =~ s{\A .*? \((\d+) \s bytes\) \z}{$1}xms;
1326 next PDISK if blacklisted('pdisk', $nexus);
1329 $vendor =~ s{\s+\z}{}xms; # remove trailing whitespace
1330 $product =~ s{\s+\z}{}xms; # remove trailing whitespace
1332 # Calculate human readable capacity
1333 $capacity = ceil($capacity / 1000**3) >= 1000
1334 ? sprintf '%.1fTB', ($capacity / 1000**4)
1335 : sprintf '%.0fGB', ($capacity / 1000**3);
1336 $capacity = '450GB' if $capacity eq '449GB'; # quick fix for 450GB disks
1337 $capacity = '146GB' if $capacity eq '147GB'; # quick fix for 146GB disks
1338 $capacity = '300GB' if $capacity eq '299GB'; # quick fix for 146GB disks
1340 # Capitalize only the first letter of the vendor name
1341 $vendor = (substr $vendor, 0, 1) . lc (substr $vendor, 1, length $vendor);
1343 # Remove unnecessary trademark rubbish from vendor name
1344 $vendor =~ s{\(tm\)\z}{}xms;
1346 # Special case: Failure predicted
1347 if ($status eq 'Non-Critical' and $fpred) {
1348 my $msg = sprintf '%s (%s %s, %s) on controller %d needs attention: Failure Predicted',
1349 $name, $vendor, $product, $capacity, $ctrl;
1350 report('storage', $msg, $E_WARNING, $nexus);
1352 # Special case: Rebuilding
1353 elsif ($state eq 'Rebuilding') {
1354 my $msg = sprintf '%s (%s) on controller %d is %s%s',
1355 $name, $capacity, $ctrl, $state, $progr;
1356 report('storage', $msg, $E_WARNING, $nexus);
1359 elsif ($status ne 'Ok') {
1360 my $msg = sprintf '%s (%s %s, %s) on controller %d needs attention: %s',
1361 $name, $vendor, $product, $capacity, $ctrl, $state;
1362 report('storage', $msg, $status2nagios{$status}, $nexus);
1366 my $msg = sprintf '%s (%s) on controller %d is %s',
1367 $name, $capacity, $ctrl, $state;
1368 report('storage', $msg, $E_OK, $nexus);
1375 #-----------------------------------------
1376 # STORAGE: Check logical drives
1377 #-----------------------------------------
1378 sub check_virtual_disks {
1379 return if $#controllers == -1;
1394 '1.3.6.1.4.1.674.10893.1.20.140.1.1.1' => 'virtualDiskNumber',
1395 '1.3.6.1.4.1.674.10893.1.20.140.1.1.2' => 'virtualDiskName',
1396 '1.3.6.1.4.1.674.10893.1.20.140.1.1.3' => 'virtualDiskDeviceName',
1397 '1.3.6.1.4.1.674.10893.1.20.140.1.1.4' => 'virtualDiskState',
1398 '1.3.6.1.4.1.674.10893.1.20.140.1.1.6' => 'virtualDiskLengthInMB',
1399 '1.3.6.1.4.1.674.10893.1.20.140.1.1.13' => 'virtualDiskLayout',
1400 '1.3.6.1.4.1.674.10893.1.20.140.1.1.20' => 'virtualDiskComponentStatus',
1401 '1.3.6.1.4.1.674.10893.1.20.140.1.1.21' => 'virtualDiskNexusID',
1403 my $result = $snmp_session->get_entries(-columns => [keys %vdisk_oid]);
1405 # No logical drives is OK
1406 return if !defined $result;
1408 @output = @{ get_snmp_output($result, \%vdisk_oid) };
1411 foreach my $c (@controllers) {
1412 push @output, @{ run_omreport("storage vdisk controller=$c") };
1413 map_item('ctrl', $c, \@output);
1426 16 => 'Regenerating',
1429 32 => 'Reconstructing',
1430 35 => 'Initializing',
1431 36 => 'Background Initialization',
1432 38 => 'Resynching Paused',
1433 52 => 'Permanently Degraded',
1434 54 => 'Degraded Redundancy',
1439 1 => 'Concatenated',
1446 19 => 'Concatenated RAID 1',
1450 # Check virtual disks on each of the controllers
1452 foreach my $out (@output) {
1454 $id = $out->{virtualDiskNumber} - 1;
1455 $dev = $out->{virtualDiskDeviceName};
1456 $state = $vdisk_state{$out->{virtualDiskState}};
1457 $status = $snmp_status{$out->{virtualDiskComponentStatus}};
1458 $layout = $vdisk_layout{$out->{virtualDiskLayout}};
1459 $size = sprintf '%.2f GB', $out->{virtualDiskLengthInMB} / 1024;
1460 $progr = q{}; # can't get this from SNMP(?)
1461 $nexus = convert_nexus($out->{virtualDiskNexusID});
1465 $dev = $out->{'Device Name'};
1466 $state = $out->{State};
1467 $status = $out->{Status};
1468 $layout = $out->{Layout};
1469 $size = $out->{Size};
1470 $progr = ' [' . $out->{Progress} . ']';
1471 $size =~ s{\A (.*GB).* \z}{$1}xms;
1472 $nexus = join q{:}, $out->{ctrl}, $id;
1475 next VDISK if blacklisted('vdisk', $nexus);
1478 # Special case: Regenerating
1479 if ($state eq 'Regenerating') {
1480 my $msg = sprintf 'Logical drive %d "%s" (%s, %s) is %s%s',
1481 $id, $dev, $layout, $size, $state, $progr;
1482 report('storage', $msg, $E_WARNING, $nexus);
1485 elsif ($status ne 'Ok') {
1486 my $msg = sprintf 'Logical drive %d "%s" (%s, %s) needs attention: %s',
1487 $id, $dev, $layout, $size, $state;
1488 report('storage', $msg, $status2nagios{$status}, $nexus);
1492 my $msg = sprintf 'Logical drive %d "%s" (%s, %s) is %s',
1493 $id, $dev, $layout, $size, $state;
1494 report('storage', $msg, $E_OK, $nexus);
1501 #-----------------------------------------
1502 # STORAGE: Check cache batteries
1503 #-----------------------------------------
1504 sub check_cache_battery {
1505 return if $#controllers == -1;
1512 my $learn = undef; # learn state
1513 my $pred = undef; # battery's ability to be charged
1519 '1.3.6.1.4.1.674.10893.1.20.130.15.1.1' => 'batteryNumber',
1520 '1.3.6.1.4.1.674.10893.1.20.130.15.1.2' => 'batteryName',
1521 '1.3.6.1.4.1.674.10893.1.20.130.15.1.4' => 'batteryState',
1522 '1.3.6.1.4.1.674.10893.1.20.130.15.1.6' => 'batteryComponentStatus',
1523 '1.3.6.1.4.1.674.10893.1.20.130.15.1.9' => 'batteryNexusID',
1524 '1.3.6.1.4.1.674.10893.1.20.130.15.1.10' => 'batteryPredictedCapacity',
1525 '1.3.6.1.4.1.674.10893.1.20.130.15.1.12' => 'batteryLearnState',
1526 '1.3.6.1.4.1.674.10893.1.20.130.16.1.5' => 'batteryConnectionControllerNumber',
1528 my $result = $snmp_session->get_entries(-columns => [keys %bat_oid]);
1530 # No cache battery is OK
1531 return if !defined $result;
1533 @output = @{ get_snmp_output($result, \%bat_oid) };
1536 foreach my $c (@controllers) {
1537 push @output, @{ run_omreport("storage battery controller=$c") };
1538 map_item('ctrl', $c, \@output);
1548 7 => 'Reconditioning',
1567 1 => 'Failed', # The battery cannot be charged and needs to be replaced
1568 2 => 'Ready', # The battery can be charged to full capacity
1569 4 => 'Unknown', # The battery is completing a Learn cycle. The charge capacity of the
1570 # battery cannot be determined until the Learn cycle is complete
1573 # Check battery on each of the controllers
1575 foreach my $out (@output) {
1577 $id = $out->{batteryNumber} - 1;
1578 $state = $bat_state{$out->{batteryState}};
1579 $status = $snmp_status{$out->{batteryComponentStatus}};
1580 $learn = exists $out->{batteryLearnState}
1581 ? $bat_learn_state{$out->{batteryLearnState}} : undef;
1582 $pred = exists $out->{batteryPredictedCapacity}
1583 ? $bat_pred_cap{$out->{batteryPredictedCapacity}} : undef;
1584 $ctrl = $out->{batteryConnectionControllerNumber} - 1;
1585 $nexus = convert_nexus($out->{batteryNexusID});
1589 $state = $out->{'State'};
1590 $status = $out->{'Status'};
1591 $learn = $out->{'Learn State'};
1592 $pred = $out->{'Predicted Capacity Status'};
1593 $ctrl = $out->{'ctrl'};
1594 $nexus = join q{:}, $out->{ctrl}, $id;
1597 next BATTERY if blacklisted('bat', $nexus);
1599 # Special case: Charging
1600 if ($state eq 'Charging') {
1601 my $msg = sprintf 'Cache battery %d in controller %d is %s (%s) [probably harmless]',
1602 $id, $ctrl, $state, $pred;
1603 report('storage', $msg, $E_WARNING, $nexus);
1605 # Special case: Learning (battery learns its capacity)
1606 elsif ($state eq 'Learning') {
1607 my $msg = sprintf 'Cache battery %d in controller %d is %s (%s) [probably harmless]',
1608 $id, $ctrl, $state, $learn;
1609 report('storage', $msg, $E_WARNING, $nexus);
1611 # Special case: Power Low (first part of recharge cycle)
1612 elsif ($state eq 'Power Low') {
1613 my $msg = sprintf 'Cache battery %d in controller %d is %s [probably harmless]',
1615 report('storage', $msg, $E_WARNING, $nexus);
1618 elsif ($status ne 'Ok') {
1619 my $msg = sprintf 'Cache battery %d in controller %d needs attention: %s (%s)',
1620 $id, $ctrl, $state, $status;
1621 report('storage', $msg, $status2nagios{$status}, $nexus);
1625 my $msg = sprintf 'Cache battery %d in controller %d is %s',
1627 report('storage', $msg, $E_OK, $nexus);
1634 #-----------------------------------------
1635 # STORAGE: Check connectors (channels)
1636 #-----------------------------------------
1637 sub check_connectors {
1638 return if $#controllers == -1;
1652 '1.3.6.1.4.1.674.10893.1.20.130.2.1.1' => 'channelNumber',
1653 '1.3.6.1.4.1.674.10893.1.20.130.2.1.2' => 'channelName',
1654 '1.3.6.1.4.1.674.10893.1.20.130.2.1.3' => 'channelState',
1655 '1.3.6.1.4.1.674.10893.1.20.130.2.1.8' => 'channelComponentStatus',
1656 '1.3.6.1.4.1.674.10893.1.20.130.2.1.9' => 'channelNexusID',
1657 '1.3.6.1.4.1.674.10893.1.20.130.2.1.11' => 'channelBusType',
1659 my $result = $snmp_session->get_entries(-columns => [keys %conn_oid]);
1661 if (!defined $result) {
1662 printf "SNMP [storage / channel]: %s.\n", $snmp_session->error;
1663 $snmp_session->close;
1667 @output = @{ get_snmp_output($result, \%conn_oid) };
1670 foreach my $c (@controllers) {
1671 push @output, @{ run_omreport("storage connector controller=$c") };
1672 map_item('ctrl', $c, \@output);
1690 3 => 'Fibre Channel',
1697 # Check connectors on each of the controllers
1699 foreach my $out (@output) {
1701 $id = $out->{channelNumber} - 1;
1702 $name = $out->{channelName};
1703 $state = $conn_state{$out->{channelState}};
1704 $status = $snmp_status{$out->{channelComponentStatus}};
1705 $type = $conn_bustype{$out->{channelBusType}};
1706 $nexus = convert_nexus($out->{channelNexusID});
1708 $ctrl =~ s{(\d+):\d+}{$1}xms;
1712 $name = $out->{'Name'};
1713 $state = $out->{'State'};
1714 $status = $out->{'Status'};
1715 $type = $out->{'Connector Type'};
1716 $ctrl = $out->{ctrl};
1717 $nexus = join q{:}, $out->{ctrl}, $id;
1720 next CHANNEL if blacklisted('conn', $nexus);
1722 my $msg = sprintf '%s (%s) on controller %d is %s',
1723 $name, $type, $ctrl, $state;
1724 report('storage', $msg, $status2nagios{$status}, $nexus);
1730 #-----------------------------------------
1731 # STORAGE: Check enclosures
1732 #-----------------------------------------
1733 sub check_enclosures {
1739 my $firmware = undef;
1745 '1.3.6.1.4.1.674.10893.1.20.130.3.1.1' => 'enclosureNumber',
1746 '1.3.6.1.4.1.674.10893.1.20.130.3.1.2' => 'enclosureName',
1747 '1.3.6.1.4.1.674.10893.1.20.130.3.1.4' => 'enclosureState',
1748 '1.3.6.1.4.1.674.10893.1.20.130.3.1.19' => 'enclosureChannelNumber',
1749 '1.3.6.1.4.1.674.10893.1.20.130.3.1.24' => 'enclosureComponentStatus',
1750 '1.3.6.1.4.1.674.10893.1.20.130.3.1.25' => 'enclosureNexusID',
1751 '1.3.6.1.4.1.674.10893.1.20.130.3.1.26' => 'enclosureFirmwareVersion',
1753 my $result = $snmp_session->get_entries(-columns => [keys %encl_oid]);
1755 # No enclosures is OK
1756 return if !defined $result;
1758 @output = @{ get_snmp_output($result, \%encl_oid) };
1761 foreach my $c (@controllers) {
1762 push @output, @{ run_omreport("storage enclosure controller=$c") };
1763 map_item('ctrl', $c, \@output);
1778 foreach my $out (@output) {
1780 $id = $out->{'enclosureNumber'} - 1;
1781 $name = $out->{'enclosureName'};
1782 $state = $encl_state{$out->{'enclosureState'}};
1783 $status = $snmp_status{$out->{'enclosureComponentStatus'}};
1784 $firmware = exists $out->{enclosureFirmwareVersion}
1785 ? $out->{enclosureFirmwareVersion} : 'N/A';
1786 $nexus = convert_nexus($out->{enclosureNexusID});
1790 $name = $out->{Name};
1791 $state = $out->{State};
1792 $status = $out->{Status};
1793 $firmware = $out->{'Firmware Version'} ne 'Not Applicable'
1794 ? $out->{'Firmware Version'} : 'N/A';
1795 $nexus = join q{:}, $out->{ctrl}, $id;
1798 $name =~ s{\s+\z}{}xms; # remove trailing whitespace
1799 $firmware =~ s{\s+\z}{}xms; # remove trailing whitespace
1801 # store enclosure data for future use
1802 push @enclosures, { 'id' => $id,
1803 'ctrl' => $out->{ctrl},
1806 # Collecting some storage info
1807 $sysinfo{'enclosure'}{$nexus}{'id'} = $nexus;
1808 $sysinfo{'enclosure'}{$nexus}{'name'} = $name;
1809 $sysinfo{'enclosure'}{$nexus}{'firmware'} = $firmware;
1811 next ENCLOSURE if blacklisted('encl', $nexus);
1813 my $msg = sprintf 'Enclosure %s (%s) is %s',
1814 $nexus, $name, $state;
1815 report('storage', $msg, $status2nagios{$status}, $nexus);
1821 #-----------------------------------------
1822 # STORAGE: Check enclosure fans
1823 #-----------------------------------------
1824 sub check_enclosure_fans {
1825 return if $#controllers == -1;
1833 my $encl_id = undef;
1834 my $encl_name = undef;
1840 '1.3.6.1.4.1.674.10893.1.20.130.7.1.1' => 'fanNumber',
1841 '1.3.6.1.4.1.674.10893.1.20.130.7.1.2' => 'fanName',
1842 '1.3.6.1.4.1.674.10893.1.20.130.7.1.4' => 'fanState',
1843 '1.3.6.1.4.1.674.10893.1.20.130.7.1.11' => 'fanProbeCurrValue',
1844 '1.3.6.1.4.1.674.10893.1.20.130.7.1.15' => 'fanComponentStatus',
1845 '1.3.6.1.4.1.674.10893.1.20.130.7.1.16' => 'fanNexusID',
1846 '1.3.6.1.4.1.674.10893.1.20.130.8.1.4' => 'fanConnectionEnclosureName',
1847 '1.3.6.1.4.1.674.10893.1.20.130.8.1.5' => 'fanConnectionEnclosureNumber',
1850 my $result = $snmp_session->get_entries(-columns => [keys %fan_oid]);
1852 # No enclosure fans is OK
1853 return if !defined $result;
1855 @output = @{ get_snmp_output($result, \%fan_oid) };
1858 foreach my $enc (@enclosures) {
1859 push @output, @{ run_omreport("storage enclosure controller=$enc->{ctrl} enclosure=$enc->{id} info=fans") };
1860 map_item('ctrl', $enc->{ctrl}, \@output);
1861 map_item('encl_id', $enc->{id}, \@output);
1862 map_item('encl_name', $enc->{name}, \@output);
1877 # Check fans on each of the enclosures
1879 foreach my $out (@output) {
1881 $id = $out->{fanNumber} - 1;
1882 $name = $out->{fanName};
1883 $state = $fan_state{$out->{fanState}};
1884 $status = $snmp_status{$out->{fanComponentStatus}};
1885 $speed = $out->{fanProbeCurrValue};
1886 $encl_id = $out->{fanConnectionEnclosureNumber} - 1;
1887 $encl_name = $out->{fanConnectionEnclosureName};
1888 $nexus = convert_nexus($out->{fanNexusID});
1892 $name = $out->{'Name'};
1893 $state = $out->{'State'};
1894 $status = $out->{'Status'};
1895 $speed = $out->{'Speed'};
1896 $encl_id = join q{:}, $out->{ctrl}, $out->{'encl_id'};
1897 $encl_name = $out->{encl_name};
1898 $nexus = join q{:}, $out->{ctrl}, $out->{'encl_id'}, $id;
1901 next FAN if blacklisted('encl_fan', $nexus);
1904 if ($status ne 'Ok') {
1905 my $msg = sprintf '%s in enclosure %s (%s) needs attention: %s',
1906 $name, $encl_id, $encl_name, $state;
1907 report('storage', $msg, $status2nagios{$status}, $nexus);
1911 my $msg = sprintf '%s in enclosure %s (%s) is %s (speed=%s)',
1912 $name, $encl_id, $encl_name, $state, $speed;
1913 report('storage', $msg, $E_OK, $nexus);
1920 #-----------------------------------------
1921 # STORAGE: Check enclosure power supplies
1922 #-----------------------------------------
1923 sub check_enclosure_pwr {
1924 return if $#controllers == -1;
1931 my $encl_id = undef;
1932 my $encl_name = undef;
1938 '1.3.6.1.4.1.674.10893.1.20.130.9.1.1' => 'powerSupplyNumber',
1939 '1.3.6.1.4.1.674.10893.1.20.130.9.1.2' => 'powerSupplyName',
1940 '1.3.6.1.4.1.674.10893.1.20.130.9.1.4' => 'powerSupplyState',
1941 '1.3.6.1.4.1.674.10893.1.20.130.9.1.9' => 'powerSupplyComponentStatus',
1942 '1.3.6.1.4.1.674.10893.1.20.130.9.1.10' => 'powerSupplyNexusID',
1943 '1.3.6.1.4.1.674.10893.1.20.130.10.1.4' => 'powerSupplyConnectionEnclosureName',
1944 '1.3.6.1.4.1.674.10893.1.20.130.10.1.5' => 'powerSupplyConnectionEnclosureNumber',
1946 my $result = $snmp_session->get_entries(-columns => [keys %ps_oid]);
1948 # No enclosure power supplies is OK
1949 return if !defined $result;
1951 @output = @{ get_snmp_output($result, \%ps_oid) };
1954 foreach my $enc (@enclosures) {
1955 push @output, @{ run_omreport("storage enclosure controller=$enc->{ctrl} enclosure=$enc->{id} info=pwrsupplies") };
1956 map_item('ctrl', $enc->{ctrl}, \@output);
1957 map_item('encl_id', $enc->{id}, \@output);
1958 map_item('encl_name', $enc->{name}, \@output);
1967 5 => 'Not Installed',
1973 # Check power supplies on each of the enclosures
1975 foreach my $out (@output) {
1977 $id = $out->{powerSupplyNumber};
1978 $name = $out->{powerSupplyName};
1979 $state = $ps_state{$out->{powerSupplyState}};
1980 $status = $snmp_status{$out->{powerSupplyComponentStatus}};
1981 $encl_id = $out->{powerSupplyConnectionEnclosureNumber} - 1;
1982 $encl_name = $out->{powerSupplyConnectionEnclosureName};
1983 $nexus = convert_nexus($out->{powerSupplyNexusID});
1987 $name = $out->{'Name'};
1988 $state = $out->{'State'};
1989 $status = $out->{'Status'};
1990 $encl_id = join q{:}, $out->{ctrl}, $out->{'encl_id'};
1991 $encl_name = $out->{encl_name};
1992 $nexus = join q{:}, $out->{ctrl}, $out->{'encl_id'}, $id;
1995 next PS if blacklisted('encl_ps', $nexus);
1998 if ($status ne 'Ok') {
1999 my $msg = sprintf '%s in enclosure %s (%s) needs attention: %s',
2000 $name, $encl_id, $encl_name, $state;
2001 report('storage', $msg, $status2nagios{$status}, $nexus);
2005 my $msg = sprintf '%s in enclosure %s (%s) is %s',
2006 $name, $encl_id, $encl_name, $state;
2007 report('storage', $msg, $E_OK, $nexus);
2014 #-----------------------------------------
2015 # STORAGE: Check enclosure temperatures
2016 #-----------------------------------------
2017 sub check_enclosure_temp {
2018 return if $#controllers == -1;
2025 my $reading = undef;
2027 my $max_warn = undef;
2028 my $max_crit = undef;
2029 my $encl_id = undef;
2030 my $encl_name = undef;
2036 '1.3.6.1.4.1.674.10893.1.20.130.11.1.1' => 'temperatureProbeNumber',
2037 '1.3.6.1.4.1.674.10893.1.20.130.11.1.2' => 'temperatureProbeName',
2038 '1.3.6.1.4.1.674.10893.1.20.130.11.1.4' => 'temperatureProbeState',
2039 '1.3.6.1.4.1.674.10893.1.20.130.11.1.6' => 'temperatureProbeUnit',
2040 '1.3.6.1.4.1.674.10893.1.20.130.11.1.9' => 'temperatureProbeMaxWarning',
2041 '1.3.6.1.4.1.674.10893.1.20.130.11.1.10' => 'temperatureProbeMaxCritical',
2042 '1.3.6.1.4.1.674.10893.1.20.130.11.1.11' => 'temperatureProbeCurValue',
2043 '1.3.6.1.4.1.674.10893.1.20.130.11.1.13' => 'temperatureProbeComponentStatus',
2044 '1.3.6.1.4.1.674.10893.1.20.130.11.1.14' => 'temperatureProbeNexusID',
2045 '1.3.6.1.4.1.674.10893.1.20.130.12.1.4' => 'temperatureConnectionEnclosureName',
2046 '1.3.6.1.4.1.674.10893.1.20.130.12.1.5' => 'temperatureConnectionEnclosureNumber',
2048 my $result = $snmp_session->get_entries(-columns => [keys %temp_oid]);
2050 # No enclosure temperature probes is OK
2051 return if !defined $result;
2053 @output = @{ get_snmp_output($result, \%temp_oid) };
2056 foreach my $enc (@enclosures) {
2057 push @output, @{ run_omreport("storage enclosure controller=$enc->{ctrl} enclosure=$enc->{id} info=temps") };
2058 map_item('ctrl', $enc->{ctrl}, \@output);
2059 map_item('encl_id', $enc->{id}, \@output);
2060 map_item('encl_name', $enc->{name}, \@output);
2075 # Check temperature probes on each of the enclosures
2077 foreach my $out (@output) {
2079 $id = $out->{temperatureProbeNumber} - 1;
2080 $name = $out->{temperatureProbeName};
2081 $state = $temp_state{$out->{temperatureProbeState}};
2082 $status = $snmp_status{$out->{temperatureProbeComponentStatus}};
2083 $unit = $out->{temperatureProbeUnit};
2084 $reading = $out->{temperatureProbeCurValue};
2085 $max_warn = $out->{temperatureProbeMaxWarning};
2086 $max_crit = $out->{temperatureProbeMaxCritical};
2087 $encl_id = $out->{temperatureConnectionEnclosureNumber} - 1;
2088 $encl_name = $out->{temperatureConnectionEnclosureName};
2089 $nexus = convert_nexus($out->{temperatureProbeNexusID});
2093 $name = $out->{'Name'};
2094 $state = $out->{'State'};
2095 $status = $out->{'Status'};
2097 $reading = $out->{'Reading'}; $reading =~ s{\s*C}{}xms;
2098 $max_warn = $out->{'Maximum Warning Threshold'}; $max_warn =~ s{\s*C}{}xms;
2099 $max_crit = $out->{'Maximum Failure Threshold'}; $max_crit =~ s{\s*C}{}xms;
2100 $encl_id = join q{:}, $out->{ctrl}, $out->{'encl_id'};
2101 $encl_name = $out->{encl_name};
2102 $nexus = join q{:}, $out->{ctrl}, $out->{'encl_id'}, $id;
2105 next TEMP if blacklisted('encl_temp', $nexus);
2108 if ($status ne 'Ok') {
2109 my $msg = sprintf '%s in enclosure %s (%s) is %s at %s (%s max)',
2110 $name, $encl_id, $encl_name, $state, $reading, $max_crit;
2111 report('storage', $msg, $status2nagios{$status}, $nexus);
2115 my $msg = sprintf '%s in enclosure %s (%s): %s (%s max)',
2116 $name, $encl_id, $encl_name, $reading, $max_crit;
2117 report('storage', $msg, $E_OK, $nexus);
2120 # Collect performance data
2121 if (defined $opt{perfdata}) {
2122 $name =~ s{\A Temperature\sProbe\s(\d+) \z}{temp_$1}gxms;
2123 my $pkey = "enclosure_${encl_id}_${name}";
2124 my $pval = join q{;}, "${reading}C", $max_warn, $max_crit;
2125 $perfdata{$pkey} = $pval;
2132 #-----------------------------------------
2133 # STORAGE: Check enclosure management modules (EMM)
2134 #-----------------------------------------
2135 sub check_enclosure_emms {
2136 return if $#controllers == -1;
2143 my $encl_id = undef;
2144 my $encl_name = undef;
2150 '1.3.6.1.4.1.674.10893.1.20.130.13.1.1' => 'enclosureManagementModuleNumber',
2151 '1.3.6.1.4.1.674.10893.1.20.130.13.1.2' => 'enclosureManagementModuleName',
2152 '1.3.6.1.4.1.674.10893.1.20.130.13.1.4' => 'enclosureManagementModuleState',
2153 '1.3.6.1.4.1.674.10893.1.20.130.13.1.11' => 'enclosureManagementModuleComponentStatus',
2154 '1.3.6.1.4.1.674.10893.1.20.130.13.1.12' => 'enclosureManagementModuleNexusID',
2155 '1.3.6.1.4.1.674.10893.1.20.130.14.1.4' => 'enclosureManagementModuleConnectionEnclosureName',
2156 '1.3.6.1.4.1.674.10893.1.20.130.14.1.5' => 'enclosureManagementModuleConnectionEnclosureNumber',
2158 my $result = $snmp_session->get_entries(-columns => [keys %emms_oid]);
2160 # No enclosure EMMs is OK
2161 return if !defined $result;
2163 @output = @{ get_snmp_output($result, \%emms_oid) };
2166 foreach my $enc (@enclosures) {
2167 push @output, @{ run_omreport("storage enclosure controller=$enc->{ctrl} enclosure=$enc->{id} info=emms") };
2168 map_item('ctrl', $enc->{ctrl}, \@output);
2169 map_item('encl_id', $enc->{id}, \@output);
2170 map_item('encl_name', $enc->{name}, \@output);
2181 5 => 'Not Installed',
2186 # Check temperature probes on each of the enclosures
2188 foreach my $out (@output) {
2190 $id = $out->{enclosureManagementModuleNumber} - 1;
2191 $name = $out->{enclosureManagementModuleName};
2192 $state = $emms_state{$out->{enclosureManagementModuleState}};
2193 $status = $snmp_status{$out->{enclosureManagementModuleComponentStatus}};
2194 $encl_id = $out->{enclosureManagementModuleConnectionEnclosureNumber} - 1;
2195 $encl_name = $out->{enclosureManagementModuleConnectionEnclosureName};
2196 $nexus = convert_nexus($out->{enclosureManagementModuleNexusID});
2200 $name = $out->{'Name'};
2201 $state = $out->{'State'};
2202 $status = $out->{'Status'};
2203 $encl_id = join q{:}, $out->{ctrl}, $out->{'encl_id'};
2204 $encl_name = $out->{encl_name};
2205 $nexus = join q{:}, $out->{ctrl}, $out->{'encl_id'}, $id;
2208 next EMM if blacklisted('encl_emm', $nexus);
2211 if ($status ne 'Ok') {
2212 my $msg = sprintf '%s in enclosure %s (%s) needs attention: %s',
2213 $name, $encl_id, $encl_name, $state;
2214 report('storage', $msg, $status2nagios{$status}, $nexus);
2218 my $msg = sprintf '%s in enclosure %s (%s) is %s',
2219 $name, $encl_id, $encl_name, $state;
2220 report('storage', $msg, $E_OK, $nexus);
2227 #-----------------------------------------
2228 # CHASSIS: Check memory modules
2229 #-----------------------------------------
2233 my $location = undef;
2242 '1.3.6.1.4.1.674.10892.1.1100.50.1.2.1' => 'memoryDeviceIndex',
2243 '1.3.6.1.4.1.674.10892.1.1100.50.1.5.1' => 'memoryDeviceStatus',
2244 '1.3.6.1.4.1.674.10892.1.1100.50.1.8.1' => 'memoryDeviceLocationName',
2245 '1.3.6.1.4.1.674.10892.1.1100.50.1.14.1' => 'memoryDeviceSize',
2246 '1.3.6.1.4.1.674.10892.1.1100.50.1.20.1' => 'memoryDeviceFailureModes',
2248 my $result = $snmp_session->get_entries(-columns => [keys %dimm_oid]);
2250 if (!defined $result) {
2251 printf "SNMP [memory]: %s.\n", $snmp_session->error;
2252 $snmp_session->close;
2256 @output = @{ get_snmp_output($result, \%dimm_oid) };
2259 @output = @{ run_omreport("$omopt_chassis memory") };
2262 # Note: These values are bit masks, so combination values are
2263 # possible. If value is 0 (zero), memory device has no faults.
2266 1 => 'ECC single bit correction warning rate exceeded',
2267 2 => 'ECC single bit correction failure rate exceeded',
2268 4 => 'ECC multibit fault encountered',
2269 8 => 'ECC single bit correction logging disabled',
2270 16 => 'device disabled because of spare activation',
2274 foreach my $out (@output) {
2275 @failures = (); # Initialize
2277 $index = $out->{memoryDeviceIndex};
2278 $status = $snmp_status{$out->{memoryDeviceStatus}};
2279 $location = $out->{memoryDeviceLocationName};
2280 $size = sprintf '%d MB', $out->{memoryDeviceSize}/1024;
2281 $modes = $out->{memoryDeviceFailureModes};
2283 foreach my $mask (sort keys %failure_mode) {
2284 if (($modes & $mask) != 0) { push @failures, $failure_mode{$mask}; }
2289 $index = $out->{'Type'} eq '[Not Occupied]' ? undef : $out->{'Index'};
2290 $status = $out->{'Status'};
2291 $location = $out->{'Connector Name'};
2292 $size = $out->{'Size'};
2293 if (defined $size) {
2294 $size =~ s{\s\s}{ }gxms;
2296 # Run 'omreport chassis memory index=X' to get the failures
2297 if ($status ne 'Ok' && defined $index) {
2298 foreach (@{ run_command("$omreport $omopt_chassis memory index=$index -fmt ssv") }) {
2299 if (m/\A Failures; (.+?) \z/xms) {
2300 chop(my $fail = $1);
2301 push @failures, split m{\.}xms, $fail;
2306 $location =~ s{\A \s*(.*?)\s* \z}{$1}xms;
2308 next DIMM if blacklisted('dimm', $index);
2310 # Ignore empty memory slots
2311 next DIMM if !defined $index;
2314 if ($status ne 'Ok') {
2316 if (scalar @failures == 0) {
2317 $msg = sprintf 'Memory module %d (%s, %s) needs attention (%s)',
2318 $index, $location, $size, $status;
2321 $msg = sprintf 'Memory module %d (%s, %s) needs attention: %s',
2322 $index, $location, $size, (join q{, }, @failures);
2325 report('chassis', $msg, $status2nagios{$status}, $index);
2329 my $msg = sprintf 'Memory module %d (%s, %s) is %s',
2330 $index, $location, $size, $status;
2331 report('chassis', $msg, $E_OK, $index);
2338 #-----------------------------------------
2339 # CHASSIS: Check fans
2340 #-----------------------------------------
2344 my $reading = undef;
2345 my $location = undef;
2346 my $max_crit = undef;
2347 my $max_warn = undef;
2353 '1.3.6.1.4.1.674.10892.1.700.12.1.2.1' => 'coolingDeviceIndex',
2354 '1.3.6.1.4.1.674.10892.1.700.12.1.5.1' => 'coolingDeviceStatus',
2355 '1.3.6.1.4.1.674.10892.1.700.12.1.6.1' => 'coolingDeviceReading',
2356 '1.3.6.1.4.1.674.10892.1.700.12.1.8.1' => 'coolingDeviceLocationName',
2357 '1.3.6.1.4.1.674.10892.1.700.12.1.10.1' => 'coolingDeviceUpperCriticalThreshold',
2358 '1.3.6.1.4.1.674.10892.1.700.12.1.11.1' => 'coolingDeviceUpperNonCriticalThreshold',
2360 my $result = $snmp_session->get_entries(-columns => [keys %cool_oid]);
2362 if ($blade && !defined $result) {
2365 elsif (!$blade && !defined $result) {
2366 printf "SNMP [cooling]: %s.\n", $snmp_session->error;
2367 $snmp_session->close;
2371 @output = @{ get_snmp_output($result, \%cool_oid) };
2374 @output = @{ run_omreport("$omopt_chassis fans") };
2378 foreach my $out (@output) {
2380 $index = $out->{coolingDeviceIndex};
2381 $status = $snmp_probestatus{$out->{coolingDeviceStatus}};
2382 $reading = $out->{coolingDeviceReading};
2383 $location = $out->{coolingDeviceLocationName};
2384 $max_crit = exists $out->{coolingDeviceUpperCriticalThreshold}
2385 ? $out->{coolingDeviceUpperCriticalThreshold} : 0;
2386 $max_warn = exists $out->{coolingDeviceUpperNonCriticalThreshold}
2387 ? $out->{coolingDeviceUpperNonCriticalThreshold} : 0;
2390 $index = $out->{'Index'};
2391 $status = $out->{'Status'};
2392 $reading = $out->{'Reading'};
2393 $location = $out->{'Probe Name'};
2394 $max_crit = $out->{'Maximum Failure Threshold'} ne '[N/A]'
2395 ? $out->{'Maximum Failure Threshold'} : 0;
2396 $max_warn = $out->{'Maximum Warning Threshold'} ne '[N/A]'
2397 ? $out->{'Maximum Warning Threshold'} : 0;
2398 $reading =~ s{\A (\d+).* \z}{$1}xms;
2399 $max_warn =~ s{\A (\d+).* \z}{$1}xms;
2400 $max_crit =~ s{\A (\d+).* \z}{$1}xms;
2403 next FAN if blacklisted('fan', $index);
2406 if ($status ne 'Ok') {
2407 my $msg = sprintf 'Chassis fan %d (%s) needs attention: %s',
2408 $index, $location, $status;
2409 my $err = $snmp ? $probestatus2nagios{$status} : $status2nagios{$status};
2410 report('chassis', $msg, $err, $index);
2413 my $msg = sprintf 'Chassis fan %d (%s): %s',
2414 $index, $location, $reading;
2415 report('chassis', $msg, $E_OK, $index);
2418 # Collect performance data
2419 if (defined $opt{perfdata}) {
2420 my $pname = lc $location;
2421 $pname =~ s{\s}{_}gxms;
2422 $pname =~ s{proc_}{cpu#}xms;
2423 my $pkey = join q{_}, 'fan', $index, $pname;
2424 my $pval = join q{;}, "${reading}RPM", $max_warn, $max_crit;
2425 $perfdata{$pkey} = $pval;
2432 #-----------------------------------------
2433 # CHASSIS: Check power supplies
2434 #-----------------------------------------
2435 sub check_powersupplies {
2439 my $err_type = undef;
2447 '1.3.6.1.4.1.674.10892.1.600.12.1.2.1' => 'powerSupplyIndex',
2448 '1.3.6.1.4.1.674.10892.1.600.12.1.5.1' => 'powerSupplyStatus',
2449 '1.3.6.1.4.1.674.10892.1.600.12.1.7.1' => 'powerSupplyType',
2450 '1.3.6.1.4.1.674.10892.1.600.12.1.11.1' => 'powerSupplySensorState',
2451 '1.3.6.1.4.1.674.10892.1.600.12.1.12.1' => 'powerSupplyConfigurationErrorType',
2453 my $result = $snmp_session->get_entries(-columns => [keys %ps_oid]);
2455 # No instrumented PSU is OK (blades, low-end servers)
2456 return 0 if !defined $result;
2458 @output = @{ get_snmp_output($result, \%ps_oid) };
2461 @output = @{ run_omreport("$omopt_chassis pwrsupplies") };
2471 6 => 'Uninterruptible Power Supply',
2481 1 => 'Presence detected',
2482 2 => 'Failure detected',
2483 4 => 'Predictive Failure',
2485 16 => 'AC lost or out-of-range',
2486 32 => 'AC out-of-range but present',
2487 64 => 'Configuration error',
2490 my %ps_config_error_type
2492 1 => 'Vendor mismatch',
2493 2 => 'Revision mismatch',
2494 3 => 'Processor missing',
2498 foreach my $out (@output) {
2500 @states = (); # contains states for the PS
2502 $index = $out->{powerSupplyIndex} - 1;
2503 $status = $snmp_status{$out->{powerSupplyStatus}};
2504 $type = $ps_type{$out->{powerSupplyType}};
2505 $err_type = defined $out->{powerSupplyConfigurationErrorType}
2506 ? $ps_config_error_type{$out->{powerSupplyConfigurationErrorType}} : undef;
2508 # get the combined state from the StatusReading OID
2509 foreach my $mask (sort keys %ps_state) {
2510 if (($out->{powerSupplySensorState} & $mask) != 0) {
2511 push @states, $ps_state{$mask};
2515 # If configuration error, also include the error type
2516 if (defined $err_type) {
2517 push @states, $err_type;
2520 # Finally, construct the state string
2521 $state = join q{, }, @states;
2524 $index = $out->{'Index'};
2525 $status = $out->{'Status'};
2526 $type = $out->{'Type'};
2527 $state = $out->{'Online Status'};
2530 next PS if blacklisted('ps', $index);
2533 if ($status ne 'Ok') {
2534 my $msg = sprintf 'Power Supply %d (%s) needs attention: %s',
2535 $index, $type, $state;
2536 report('chassis', $msg, $status2nagios{$status}, $index);
2539 my $msg = sprintf 'Power Supply %d (%s): %s',
2540 $index, $type, $state;
2541 report('chassis', $msg, $E_OK, $index);
2548 #-----------------------------------------
2549 # CHASSIS: Check temperatures
2550 #-----------------------------------------
2551 sub check_temperatures {
2554 my $reading = undef;
2555 my $location = undef;
2556 my $max_crit = undef;
2557 my $max_warn = undef;
2558 my $min_warn = undef;
2559 my $min_crit = undef;
2561 my $discrete = undef;
2564 # Getting custom temperature thresholds (user option)
2565 my %warn_threshold = %{ custom_temperature_thresholds('w') };
2566 my %crit_threshold = %{ custom_temperature_thresholds('c') };
2571 '1.3.6.1.4.1.674.10892.1.700.20.1.2.1' => 'temperatureProbeIndex',
2572 '1.3.6.1.4.1.674.10892.1.700.20.1.5.1' => 'temperatureProbeStatus',
2573 '1.3.6.1.4.1.674.10892.1.700.20.1.6.1' => 'temperatureProbeReading',
2574 '1.3.6.1.4.1.674.10892.1.700.20.1.7.1' => 'temperatureProbeType',
2575 '1.3.6.1.4.1.674.10892.1.700.20.1.8.1' => 'temperatureProbeLocationName',
2576 '1.3.6.1.4.1.674.10892.1.700.20.1.10.1' => 'temperatureProbeUpperCriticalThreshold',
2577 '1.3.6.1.4.1.674.10892.1.700.20.1.11.1' => 'temperatureProbeUpperNonCriticalThreshold',
2578 '1.3.6.1.4.1.674.10892.1.700.20.1.12.1' => 'temperatureProbeLowerNonCriticalThreshold',
2579 '1.3.6.1.4.1.674.10892.1.700.20.1.13.1' => 'temperatureProbeLowerCriticalThreshold',
2580 '1.3.6.1.4.1.674.10892.1.700.20.1.16.1' => 'temperatureProbeDiscreteReading',
2582 # this didn't work well for some reason
2583 #my $result = $snmp_session->get_entries(-columns => [keys %temp_oid]);
2585 # Getting values using the table
2586 my $temperatureProbeTable = '1.3.6.1.4.1.674.10892.1.700.20';
2587 my $result = $snmp_session->get_table(-baseoid => $temperatureProbeTable);
2589 if (!defined $result) {
2590 printf "SNMP [temperatures]: %s.\n", $snmp_session->error;
2591 $snmp_session->close;
2595 @output = @{ get_snmp_output($result, \%temp_oid) };
2598 @output = @{ run_omreport("$omopt_chassis temps") };
2603 1 => 'Other', # type is other than following values
2604 2 => 'Unknown', # type is unknown
2605 3 => 'AmbientESM', # type is Ambient Embedded Systems Management temperature probe
2606 16 => 'Discrete', # type is temperature probe with discrete reading
2610 foreach my $out (@output) {
2612 $index = $out->{temperatureProbeIndex} - 1;
2613 $status = $snmp_probestatus{$out->{temperatureProbeStatus}};
2614 $reading = $out->{temperatureProbeReading} / 10;
2615 $location = $out->{temperatureProbeLocationName};
2616 $max_crit = $out->{temperatureProbeUpperCriticalThreshold} / 10;
2617 $max_warn = $out->{temperatureProbeUpperNonCriticalThreshold} / 10;
2618 $min_crit = exists $out->{temperatureProbeLowerCriticalThreshold}
2619 ? $out->{temperatureProbeLowerCriticalThreshold} / 10 : '[N/A]';
2620 $min_warn = exists $out->{temperatureProbeLowerNonCriticalThreshold}
2621 ? $out->{temperatureProbeLowerNonCriticalThreshold} / 10 : '[N/A]';
2622 $type = $probe_type{$out->{temperatureProbeType}};
2623 $discrete = exists $out->{temperatureProbeDiscreteReading}
2624 ? $out->{temperatureProbeDiscreteReading} : undef;
2627 $index = $out->{'Index'};
2628 $status = $out->{'Status'};
2629 $reading = $out->{'Reading'}; $reading =~ s{\.0\s+C}{}xms;
2630 $location = $out->{'Probe Name'};
2631 $max_crit = $out->{'Maximum Failure Threshold'}; $max_crit =~ s{\.0\s+C}{}xms;
2632 $max_warn = $out->{'Maximum Warning Threshold'}; $max_warn =~ s{\.0\s+C}{}xms;
2633 $min_crit = $out->{'Minimum Failure Threshold'}; $min_crit =~ s{\.0\s+C}{}xms;
2634 $min_warn = $out->{'Minimum Warning Threshold'}; $min_warn =~ s{\.0\s+C}{}xms;
2635 $type = $reading =~ m{\A\d+\z}xms ? 'AmbientESM' : 'Discrete';
2636 $discrete = $reading;
2639 next TEMP if blacklisted('temp', $index);
2642 if ($type eq 'Discrete') {
2643 my $msg = sprintf 'Temperature probe %d (%s): is %s',
2644 $index, $location, $discrete;
2645 my $err = $snmp ? $probestatus2nagios{$status} : $status2nagios{$status};
2646 report('chassis', $msg, $err, $index);
2649 # First check according to custom thresholds
2650 if (exists $crit_threshold{$index}{max} and $reading > $crit_threshold{$index}{max}) {
2651 # Custom critical MAX
2652 my $msg = sprintf 'Temperature Probe %d (%s) reads %d C (custom max=%d)',
2653 $index, $location, $reading, $crit_threshold{$index}{max};
2654 report('chassis', $msg, $E_CRITICAL, $index);
2656 elsif (exists $warn_threshold{$index}{max} and $reading > $warn_threshold{$index}{max}) {
2657 # Custom warning MAX
2658 my $msg = sprintf 'Temperature Probe %d (%s) reads %d C (custom max=%d)',
2659 $index, $location, $reading, $warn_threshold{$index}{max};
2660 report('chassis', $msg, $E_WARNING, $index);
2662 elsif (exists $crit_threshold{$index}{min} and $reading < $crit_threshold{$index}{min}) {
2663 # Custom critical MIN
2664 my $msg = sprintf 'Temperature Probe %d (%s) reads %d C (custom min=%d)',
2665 $index, $location, $reading, $crit_threshold{$index}{min};
2666 report('chassis', $msg, $E_CRITICAL, $index);
2668 elsif (exists $warn_threshold{$index}{min} and $reading < $warn_threshold{$index}{min}) {
2669 # Custom warning MIN
2670 my $msg = sprintf 'Temperature Probe %d (%s) reads %d C (custom min=%d)',
2671 $index, $location, $reading, $warn_threshold{$index}{min};
2672 report('chassis', $msg, $E_WARNING, $index);
2674 elsif ($status ne 'Ok' and $max_crit ne '[N/A]' and $reading > $max_crit) {
2675 my $msg = sprintf 'Temperature Probe %d (%s) is critically high at %d C',
2676 $index, $location, $reading;
2677 my $err = $snmp ? $probestatus2nagios{$status} : $status2nagios{$status};
2678 report('chassis', $msg, $err, $index);
2680 elsif ($status ne 'Ok' and $max_warn ne '[N/A]' and $reading > $max_warn) {
2681 my $msg = sprintf 'Temperature Probe %d (%s) is too high at %d C',
2682 $index, $location, $reading;
2683 my $err = $snmp ? $probestatus2nagios{$status} : $status2nagios{$status};
2684 report('chassis', $msg, $err, $index);
2686 elsif ($status ne 'Ok' and $min_crit ne '[N/A]' and $reading < $min_crit) {
2687 my $msg = sprintf 'Temperature Probe %d (%s) is critically low at %d C',
2688 $index, $location, $reading;
2689 my $err = $snmp ? $probestatus2nagios{$status} : $status2nagios{$status};
2690 report('chassis', $msg, $err, $index);
2692 elsif ($status ne 'Ok' and $min_warn ne '[N/A]' and $reading < $min_warn) {
2693 my $msg = sprintf 'Temperature Probe %d (%s) is too low at %d C',
2694 $index, $location, $reading;
2695 my $err = $snmp ? $probestatus2nagios{$status} : $status2nagios{$status};
2696 report('chassis', $msg, $err, $index);
2700 my $msg = sprintf 'Temperature Probe %d (%s) reads %d C (min=%s/%s, max=%s/%s)',
2701 $index, $location, $reading, $min_warn, $min_crit, $max_warn, $max_crit;
2702 my $err = $snmp ? $probestatus2nagios{$status} : $status2nagios{$status};
2703 report('chassis', $msg, $err, $index);
2706 # Collect performance data
2707 if (defined $opt{perfdata}) {
2708 my $pname = lc $location;
2709 $pname =~ s{\s}{_}gxms;
2710 $pname =~ s{_temp\z}{}xms;
2711 $pname =~ s{proc_}{cpu#}xms;
2712 my $pkey = join q{_}, 'temp', $index, $pname;
2713 my $pval = join q{;}, "${reading}C", $max_warn, $max_crit;
2714 $perfdata{$pkey} = $pval;
2722 #-----------------------------------------
2723 # CHASSIS: Check processors
2724 #-----------------------------------------
2725 sub check_processors {
2729 my $oid_ver = 'new';
2734 # NOTE: For some reason, older models don't have the
2735 # "Processor Device Status" OIDs. We first check the newer
2736 # (preferred) OIDs, and if that doesn't work, check the "old"
2739 my %cpu_oid_new # for newer models
2741 '1.3.6.1.4.1.674.10892.1.1100.32.1.2.1' => 'processorDeviceStatusIndex',
2742 '1.3.6.1.4.1.674.10892.1.1100.32.1.5.1' => 'processorDeviceStatusStatus',
2743 '1.3.6.1.4.1.674.10892.1.1100.32.1.6.1' => 'processorDeviceStatusReading',
2746 my %cpu_oid_old # for older models
2748 '1.3.6.1.4.1.674.10892.1.1100.30.1.2.1' => 'processorDeviceIndex',
2749 '1.3.6.1.4.1.674.10892.1.1100.30.1.5.1' => 'processorDeviceStatus',
2750 '1.3.6.1.4.1.674.10892.1.1100.30.1.9.1' => 'processorDeviceStatusState',
2753 my $result = $snmp_session->get_entries(-columns => [keys %cpu_oid_new]);
2755 if (!defined $result) {
2757 $result = $snmp_session->get_entries(-columns => [keys %cpu_oid_old]);
2760 if (!defined $result) {
2761 printf "SNMP [processors]: %s.\n", $snmp_session->error;
2762 $snmp_session->close;
2766 if ($oid_ver eq 'new') {
2767 @output = @{ get_snmp_output($result, \%cpu_oid_new) };
2770 @output = @{ get_snmp_output($result, \%cpu_oid_old) };
2774 @output = @{ run_omreport("$omopt_chassis processors") };
2779 1 => 'Other', # other than following values
2780 2 => 'Unknown', # unknown
2781 3 => 'Enabled', # enabled
2782 4 => 'User Disabled', # disabled by user via BIOS setup
2783 5 => 'BIOS Disabled', # disabled by BIOS (POST error)
2789 1 => 'Internal Error', # Internal Error
2790 2 => 'Thermal Trip', # Thermal Trip
2791 32 => 'Configuration Error', # Configuration Error
2792 128 => 'Present', # Processor Present
2793 256 => 'Disabled', # Processor Disabled
2794 512 => 'Terminator Present', # Terminator Present
2795 1024 => 'Throttled', # Processor Throttled
2800 foreach my $out (@output) {
2802 if ($oid_ver eq 'new') {
2803 my @states = (); # contains states for the CPU
2804 $index = $out->{processorDeviceStatusIndex} - 1;
2805 $status = $snmp_status{$out->{processorDeviceStatusStatus}};
2807 # get the combined state from the StatusReading OID
2808 foreach my $mask (sort keys %cpu_reading) {
2809 if (($out->{processorDeviceStatusReading} & $mask) != 0) {
2810 push @states, $cpu_reading{$mask};
2814 # Finally, create the state string
2815 $state = join q{, }, @states;
2818 $index = $out->{processorDeviceIndex} - 1;
2819 $status = $snmp_status{$out->{processorDeviceStatus}};
2820 $state = $cpu_state{$out->{processorDeviceStatusState}};
2824 $index = $out->{'Index'};
2825 $status = $out->{'Status'};
2826 $state = $out->{'State'};
2829 next CPU if blacklisted('cpu', $index);
2831 # Ignore unoccupied CPU slots (omreport)
2832 next CPU if (defined $out->{'Processor Manufacturer'}
2833 and $out->{'Processor Manufacturer'} eq '[Not Occupied]')
2834 or (defined $out->{'Processor Brand'} and $out->{'Processor Brand'} eq '[Not Occupied]');
2836 # Ignore unoccupied CPU slots (snmp)
2837 if ($snmp and exists $out->{processorDeviceStatusReading}
2838 and $out->{processorDeviceStatusReading} == 0) {
2845 if ($status ne 'Ok') {
2846 my $msg = sprintf 'CPU %d needs attention: %s',
2848 report('chassis', $msg, $status2nagios{$status}, $index);
2852 my $msg = sprintf 'CPU %d is %s',
2854 report('chassis', $msg, $E_OK, $index);
2861 #-----------------------------------------
2862 # CHASSIS: Check voltage probes
2863 #-----------------------------------------
2867 my $reading = undef;
2868 my $location = undef;
2874 '1.3.6.1.4.1.674.10892.1.600.20.1.2.1' => 'voltageProbeIndex',
2875 '1.3.6.1.4.1.674.10892.1.600.20.1.5.1' => 'voltageProbeStatus',
2876 '1.3.6.1.4.1.674.10892.1.600.20.1.6.1' => 'voltageProbeReading',
2877 '1.3.6.1.4.1.674.10892.1.600.20.1.8.1' => 'voltageProbeLocationName',
2878 '1.3.6.1.4.1.674.10892.1.600.20.1.16.1' => 'voltageProbeDiscreteReading',
2881 my $voltageProbeTable = '1.3.6.1.4.1.674.10892.1.600.20.1';
2882 my $result = $snmp_session->get_table(-baseoid => $voltageProbeTable);
2884 if (!defined $result) {
2885 printf "SNMP [voltage probes]: %s.\n", $snmp_session->error;
2886 $snmp_session->close;
2890 @output = @{ get_snmp_output($result, \%volt_oid) };
2893 @output = @{ run_omreport("$omopt_chassis volts") };
2896 my %volt_discrete_reading
2903 foreach my $out (@output) {
2905 $index = $out->{voltageProbeIndex} - 1;
2906 $status = $snmp_status{$out->{voltageProbeStatus}};
2907 $reading = exists $out->{voltageProbeReading}
2908 ? sprintf('%.3f V', $out->{voltageProbeReading}/1000)
2909 : $volt_discrete_reading{$out->{voltageProbeDiscreteReading}};
2910 $location = $out->{voltageProbeLocationName};
2913 $index = $out->{'Index'};
2914 $status = $out->{'Status'};
2915 $reading = $out->{'Reading'};
2916 $location = $out->{'Probe Name'};
2919 next VOLT if blacklisted('volt', $index);
2922 my $msg = sprintf 'Voltage sensor %d (%s) is %s',
2923 $index, $location, $reading;
2924 my $err = $snmp ? $probestatus2nagios{$status} : $status2nagios{$status};
2925 report('chassis', $msg, $err, $index);
2931 #-----------------------------------------
2932 # CHASSIS: Check batteries
2933 #-----------------------------------------
2934 sub check_batteries {
2937 my $reading = undef;
2938 my $location = undef;
2944 '1.3.6.1.4.1.674.10892.1.600.50.1.2.1' => 'batteryIndex',
2945 '1.3.6.1.4.1.674.10892.1.600.50.1.5.1' => 'batteryStatus',
2946 '1.3.6.1.4.1.674.10892.1.600.50.1.6.1' => 'batteryReading',
2947 '1.3.6.1.4.1.674.10892.1.600.50.1.7.1' => 'batteryLocationName',
2949 my $result = $snmp_session->get_entries(-columns => [keys %bat_oid]);
2951 # No batteries is OK
2952 return 0 if !defined $result;
2954 @output = @{ get_snmp_output($result, \%bat_oid) };
2957 @output = @{ run_omreport("$omopt_chassis batteries") };
2962 1 => 'Predictive Failure',
2964 4 => 'Presence Detected',
2968 foreach my $out (@output) {
2970 $index = $out->{batteryIndex} - 1;
2971 $status = $snmp_status{$out->{batteryStatus}};
2972 $reading = $bat_reading{$out->{batteryReading}};
2973 $location = $out->{batteryLocationName};
2976 $index = $out->{'Index'};
2977 $status = $out->{'Status'};
2978 $reading = $out->{'Reading'};
2979 $location = $out->{'Probe Name'};
2982 next BATTERY if blacklisted('bp', $index);
2985 my $msg = sprintf 'Battery probe %d (%s) is %s',
2986 $index, $location, $reading;
2987 report('chassis', $msg, $status2nagios{$status}, $index);
2993 #-----------------------------------------
2994 # CHASSIS: Check amperage probes (power monitoring)
2995 #-----------------------------------------
2996 sub check_pwrmonitoring {
2999 my $reading = undef;
3000 my $location = undef;
3001 my $max_crit = undef;
3002 my $max_warn = undef;
3009 '1.3.6.1.4.1.674.10892.1.600.30.1.2.1' => 'amperageProbeIndex',
3010 '1.3.6.1.4.1.674.10892.1.600.30.1.5.1' => 'amperageProbeStatus',
3011 '1.3.6.1.4.1.674.10892.1.600.30.1.6.1' => 'amperageProbeReading',
3012 '1.3.6.1.4.1.674.10892.1.600.30.1.7.1' => 'amperageProbeType',
3013 '1.3.6.1.4.1.674.10892.1.600.30.1.8.1' => 'amperageProbeLocationName',
3014 '1.3.6.1.4.1.674.10892.1.600.30.1.10.1' => 'amperageProbeUpperCriticalThreshold',
3015 '1.3.6.1.4.1.674.10892.1.600.30.1.11.1' => 'amperageProbeUpperNonCriticalThreshold',
3016 '1.3.6.1.4.1.674.10892.1.600.30.1.16.1' => 'amperageProbeDiscreteReading',
3018 my $result = $snmp_session->get_entries(-columns => [keys %amp_oid]);
3020 # No pwrmonitoring is OK
3021 return 0 if !defined $result;
3023 @output = @{ get_snmp_output($result, \%amp_oid) };
3026 @output = @{ run_omreport("$omopt_chassis pwrmonitoring") };
3029 my %amp_type # Amperage probe types
3031 1 => 'amperageProbeTypeIsOther', # other than following values
3032 2 => 'amperageProbeTypeIsUnknown', # unknown
3033 3 => 'amperageProbeTypeIs1Point5Volt', # 1.5 amperage probe
3034 4 => 'amperageProbeTypeIs3Point3volt', # 3.3 amperage probe
3035 5 => 'amperageProbeTypeIs5Volt', # 5 amperage probe
3036 6 => 'amperageProbeTypeIsMinus5Volt', # -5 amperage probe
3037 7 => 'amperageProbeTypeIs12Volt', # 12 amperage probe
3038 8 => 'amperageProbeTypeIsMinus12Volt', # -12 amperage probe
3039 9 => 'amperageProbeTypeIsIO', # I/O probe
3040 10 => 'amperageProbeTypeIsCore', # Core probe
3041 11 => 'amperageProbeTypeIsFLEA', # FLEA (standby) probe
3042 12 => 'amperageProbeTypeIsBattery', # Battery probe
3043 13 => 'amperageProbeTypeIsTerminator', # SCSI Termination probe
3044 14 => 'amperageProbeTypeIs2Point5Volt', # 2.5 amperage probe
3045 15 => 'amperageProbeTypeIsGTL', # GTL (ground termination logic) probe
3046 16 => 'amperageProbeTypeIsDiscrete', # amperage probe with discrete reading
3047 23 => 'amperageProbeTypeIsPowerSupplyAmps', # Power Supply probe with reading in Amps
3048 24 => 'amperageProbeTypeIsPowerSupplyWatts', # Power Supply probe with reading in Watts
3049 25 => 'amperageProbeTypeIsSystemAmps', # System probe with reading in Amps
3050 26 => 'amperageProbeTypeIsSystemWatts', # System probe with reading in Watts
3061 'amperageProbeTypeIsPowerSupplyAmps' => 'hA', # tenths of Amps
3062 'amperageProbeTypeIsSystemAmps' => 'hA', # tenths of Amps
3063 'amperageProbeTypeIsPowerSupplyWatts' => 'W', # Watts
3064 'amperageProbeTypeIsSystemWatts' => 'W', # Watts
3065 'amperageProbeTypeIsDiscrete' => q{}, # discrete reading, no unit
3069 foreach my $out (@output) {
3071 $index = $out->{amperageProbeIndex} - 1;
3072 $status = $snmp_status{$out->{amperageProbeStatus}};
3073 $reading = $amp_type{$out->{amperageProbeType}} eq 'amperageProbeTypeIsDiscrete'
3074 ? $amp_discrete{$out->{amperageProbeDiscreteReading}}
3075 : $out->{amperageProbeReading};
3076 $location = $out->{amperageProbeLocationName};
3077 $max_crit = exists $out->{amperageProbeUpperCriticalThreshold}
3078 ? $out->{amperageProbeUpperCriticalThreshold} : 0;
3079 $max_warn = exists $out->{amperageProbeUpperNonCriticalThreshold}
3080 ? $out->{amperageProbeUpperNonCriticalThreshold} : 0;
3081 $unit = exists $amp_unit{$amp_type{$out->{amperageProbeType}}}
3082 ? $amp_unit{$amp_type{$out->{amperageProbeType}}} : 'mA';
3083 if ($unit eq 'hA') {
3091 $index = $out->{'Index'};
3092 next if $index !~ m/^\d+$/x;
3093 $status = $out->{'Status'};
3094 $reading = $out->{'Reading'};
3095 $location = $out->{'Probe Name'};
3096 $max_crit = $out->{'Failure Threshold'} ne '[N/A]'
3097 ? $out->{'Failure Threshold'} : 0;
3098 $max_warn = $out->{'Warning Threshold'} ne '[N/A]'
3099 ? $out->{'Warning Threshold'} : 0;
3100 $reading =~ s{\A (\d+.*?)\s+([a-zA-Z]+) \s*\z}{$1}xms;
3102 $max_warn =~ s{\A (\d+.*?)\s+[a-zA-Z]+ \s*\z}{$1}xms;
3103 $max_crit =~ s{\A (\d+.*?)\s+[a-zA-Z]+ \s*\z}{$1}xms;
3106 next AMP if blacklisted('pm', $index);
3107 next AMP if $index !~ m{\A \d+ \z}xms;
3110 my $msg = sprintf 'Amperage probe %d (%s) reads %s %s',
3111 $index, $location, $reading, $unit, $status;
3112 report('chassis', $msg, $status2nagios{$status}, $index);
3114 # Collect performance data
3115 if (defined $opt{perfdata}) {
3116 next AMP if $reading !~ m{\A \d+(\.\d+)? \z}xms; # discrete reading (not number)
3117 my $pname = lc $location;
3118 $pname =~ s{\s}{_}gxms;
3119 my $pkey = join q{_}, 'pwr_mon', $index, $pname;
3120 my $pval = join q{;}, "$reading$unit", $max_warn, $max_crit;
3121 $perfdata{$pkey} = $pval;
3125 # Collect EXTRA performance data not found at first run. This is a
3127 if (defined $opt{perfdata} && !$snmp) {
3133 foreach (keys %perfdata) {
3134 if (m/\A pwr_mon_(\d+)/xms) {
3140 foreach my $line (@{ run_command("$omreport $omopt_chassis pwrmonitoring -fmt ssv") }) {
3142 if ($line eq 'Location;Reading') {
3150 if ($found and $line =~ m/\A ([^;]+?) ; (\d*\.\d+) \s ([AW]) \z/xms) {
3154 $aname =~ s{\s}{_}gxms;
3156 # don't use an existing index
3157 while (exists $used{$index}) { ++$index; }
3159 $perfdata{"pwr_mon_${index}_${aname}"} = "$aval$aunit;0;0";
3169 #-----------------------------------------
3170 # CHASSIS: Check intrusion
3171 #-----------------------------------------
3172 sub check_intrusion {
3175 my $reading = undef;
3181 '1.3.6.1.4.1.674.10892.1.300.70.1.2.1' => 'intrusionIndex',
3182 '1.3.6.1.4.1.674.10892.1.300.70.1.5.1' => 'intrusionStatus',
3183 '1.3.6.1.4.1.674.10892.1.300.70.1.6.1' => 'intrusionReading',
3185 my $result = $snmp_session->get_entries(-columns => [keys %int_oid]);
3187 # No intrusion is OK
3188 return 0 if !defined $result;
3190 @output = @{ get_snmp_output($result, \%int_oid) };
3193 @output = @{ run_omreport("$omopt_chassis intrusion") };
3198 1 => 'Not Breached', # chassis not breached and no uncleared breaches
3199 2 => 'Breached', # chassis currently breached
3200 3 => 'Breached Prior', # chassis breached prior to boot and has not been cleared
3201 4 => 'Breach Sensor Failure', # intrusion sensor has failed
3205 foreach my $out (@output) {
3207 $index = $out->{intrusionIndex} - 1;
3208 $status = $snmp_status{$out->{intrusionStatus}};
3209 $reading = $int_reading{$out->{intrusionReading}};
3212 $index = $out->{'Index'};
3213 $status = $out->{'Status'};
3214 $reading = $out->{'State'};
3217 next INTRUSION if blacklisted('intr', $index);
3220 if ($status ne 'Ok') {
3221 my $msg = sprintf 'Chassis intrusion %d detected: %s',
3223 report('chassis', $msg, $E_WARNING, $index);
3227 my $msg = sprintf 'Chassis intrusion %d detection: %s (%s)',
3228 $index, $status, $reading;
3229 report('chassis', $msg, $E_OK, $index);
3236 #-----------------------------------------
3237 # CHASSIS: Check alert log
3238 #-----------------------------------------
3239 sub check_alertlog {
3240 return if $snmp; # Not supported with SNMP
3242 my @output = @{ run_omreport("$omopt_system alertlog") };
3243 foreach my $out (@output) {
3244 ++$count{alert}{$out->{Severity}};
3247 # Create error messages and set exit value if appropriate
3249 if ($count{alert}{'Critical'} > 0) { $err = $E_CRITICAL; }
3250 elsif ($count{alert}{'Non-Critical'} > 0) { $err = $E_WARNING; }
3252 my $msg = sprintf 'Alert log content: %d critical, %d non-critical, %d ok',
3253 $count{alert}{'Critical'}, $count{alert}{'Non-Critical'}, $count{alert}{'Ok'};
3254 report('other', $msg, $err);
3259 #-----------------------------------------
3260 # CHASSIS: Check ESM log overall health
3261 #-----------------------------------------
3262 sub check_esmlog_health {
3266 my $systemStateEventLogStatus = '1.3.6.1.4.1.674.10892.1.200.10.1.41.1';
3267 my $result = $snmp_session->get_request(-varbindlist => [$systemStateEventLogStatus]);
3268 if (!defined $result) {
3269 my $msg = sprintf 'SNMP ERROR getting systemStateEventLogStatus OID: %s',
3270 $snmp_session->error;
3271 report('other', $msg, $E_UNKNOWN);
3273 $health = $snmp_status{$result->{$systemStateEventLogStatus}};
3276 foreach (@{ run_command("$omreport $omopt_system esmlog -fmt ssv") }) {
3277 if (m/\A Health;(.+) \z/xms) {
3285 # If the overall health of the ESM log is other than "Ok", the
3286 # fill grade of the log is more than 80% and the log should be
3288 if ($health eq 'Ok') {
3289 my $msg = sprintf 'ESM log is health is OK (less than 80%% full)';
3290 report('other', $msg, $E_OK);
3292 elsif ($health eq 'Critical') {
3293 my $msg = sprintf 'ESM log is 100%% full!';
3294 report('other', $msg, $status2nagios{$health});
3297 my $msg = sprintf 'ESM log is more than 80%% full';
3298 report('other', $msg, $status2nagios{$health});
3304 #-----------------------------------------
3305 # CHASSIS: Check ESM log
3306 #-----------------------------------------
3313 '1.3.6.1.4.1.674.10892.1.300.40.1.7.1' => 'eventLogSeverityStatus',
3315 my $result = $snmp_session->get_entries(-columns => [keys %esm_oid]);
3318 return if !defined $result;
3320 @output = @{ get_snmp_output($result, \%esm_oid) };
3321 foreach my $out (@output) {
3322 ++$count{esm}{$snmp_status{$out->{eventLogSeverityStatus}}};
3326 @output = @{ run_omreport("$omopt_system esmlog") };
3327 foreach my $out (@output) {
3328 ++$count{esm}{$out->{Severity}};
3332 # Create error messages and set exit value if appropriate
3334 if ($count{esm}{'Critical'} > 0) { $err = $E_CRITICAL; }
3335 elsif ($count{esm}{'Non-Critical'} > 0) { $err = $E_WARNING; }
3337 my $msg = sprintf 'ESM log content: %d critical, %d non-critical, %d ok',
3338 $count{esm}{'Critical'}, $count{esm}{'Non-Critical'}, $count{esm}{'Ok'};
3339 report('other', $msg, $err);
3345 # Handy function for checking all storage components
3348 check_controllers();
3349 check_physical_disks();
3350 check_virtual_disks();
3351 check_cache_battery();
3354 check_enclosure_fans();
3355 check_enclosure_pwr();
3356 check_enclosure_temp();
3357 check_enclosure_emms();
3363 #---------------------------------------------------------------------
3365 #---------------------------------------------------------------------
3368 # Fetch output from 'omreport chassis info', put in sysinfo hash
3370 sub get_omreport_chassis_info {
3371 if (open my $INFO, '-|', "$omreport $omopt_chassis info -fmt ssv") {
3372 my @lines = <$INFO>;
3375 next if !m/\A (Chassis\sModel|Chassis\sService\sTag|Model|Service\sTag)/xms;
3376 my ($key, $val) = split /;/xms;
3377 $key =~ s{\s+\z}{}xms; # remove trailing whitespace
3378 $val =~ s{\s+\z}{}xms; # remove trailing whitespace
3379 if ($key eq 'Chassis Model' or $key eq 'Model') {
3380 $sysinfo{model} = $val;
3382 if ($key eq 'Chassis Service Tag' or $key eq 'Service Tag') {
3383 $sysinfo{serial} = $val;
3391 # Fetch output from 'omreport chassis bios', put in sysinfo hash
3393 sub get_omreport_chassis_bios {
3394 if (open my $BIOS, '-|', "$omreport $omopt_chassis bios -fmt ssv") {
3395 my @lines = <$BIOS>;
3399 my ($key, $val) = split /;/xms;
3400 $key =~ s{\s+\z}{}xms; # remove trailing whitespace
3401 $val =~ s{\s+\z}{}xms; # remove trailing whitespace
3402 $sysinfo{bios} = $val if $key eq 'Version';
3403 $sysinfo{biosdate} = $val if $key eq 'Release Date';
3410 # Fetch output from 'omreport system operatingsystem', put in sysinfo hash
3412 sub get_omreport_system_operatingsystem {
3413 if (open my $VER, '-|', "$omreport $omopt_system operatingsystem -fmt ssv") {
3418 my ($key, $val) = split /;/xms;
3419 $key =~ s{\s+\z}{}xms; # remove trailing whitespace
3420 $val =~ s{\s+\z}{}xms; # remove trailing whitespace
3421 if ($key eq 'Operating System') {
3422 $sysinfo{osname} = $val;
3424 elsif ($key eq 'Operating System Version') {
3425 $sysinfo{osver} = $val;
3433 # Fetch output from 'omreport about', put in sysinfo hash
3435 sub get_omreport_about {
3436 if (open my $OM, '-|', "$omreport about -fmt ssv") {
3440 if (m/\A Version;(.+) \z/xms) {
3450 # Fetch chassis info via SNMP, put in sysinfo hash
3452 sub get_snmp_chassis_info {
3455 '1.3.6.1.4.1.674.10892.1.300.10.1.9.1' => 'chassisModelName',
3456 '1.3.6.1.4.1.674.10892.1.300.10.1.11.1' => 'chassisServiceTagName',
3459 my $chassisInformationTable = '1.3.6.1.4.1.674.10892.1.300.10.1';
3460 my $result = $snmp_session->get_table(-baseoid => $chassisInformationTable);
3462 if (defined $result) {
3463 foreach my $oid (keys %{ $result }) {
3464 if (exists $chassis_oid{$oid} and $chassis_oid{$oid} eq 'chassisModelName') {
3465 $sysinfo{model} = $result->{$oid};
3466 $sysinfo{model} =~ s{\s+\z}{}xms; # remove trailing whitespace
3468 elsif (exists $chassis_oid{$oid} and $chassis_oid{$oid} eq 'chassisServiceTagName') {
3469 $sysinfo{serial} = $result->{$oid};
3474 my $msg = sprintf 'SNMP ERROR getting chassis info: %s',
3475 $snmp_session->error;
3476 report('other', $msg, $E_UNKNOWN);
3482 # Fetch BIOS info via SNMP, put in sysinfo hash
3484 sub get_snmp_chassis_bios {
3487 '1.3.6.1.4.1.674.10892.1.300.50.1.7.1.1' => 'systemBIOSReleaseDateName',
3488 '1.3.6.1.4.1.674.10892.1.300.50.1.8.1.1' => 'systemBIOSVersionName',
3491 my $systemBIOSTable = '1.3.6.1.4.1.674.10892.1.300.50.1';
3492 my $result = $snmp_session->get_table(-baseoid => $systemBIOSTable);
3494 if (defined $result) {
3495 foreach my $oid (keys %{ $result }) {
3496 if (exists $bios_oid{$oid} and $bios_oid{$oid} eq 'systemBIOSReleaseDateName') {
3497 $sysinfo{biosdate} = $result->{$oid};
3498 $sysinfo{biosdate} =~ s{\A (\d{4})(\d{2})(\d{2}).*}{$2/$3/$1}xms;
3500 elsif (exists $bios_oid{$oid} and $bios_oid{$oid} eq 'systemBIOSVersionName') {
3501 $sysinfo{bios} = $result->{$oid};
3506 my $msg = sprintf 'SNMP ERROR getting BIOS info: %s',
3507 $snmp_session->error;
3508 report('other', $msg, $E_UNKNOWN);
3514 # Fetch OS info via SNMP, put in sysinfo hash
3516 sub get_snmp_system_operatingsystem {
3519 '1.3.6.1.4.1.674.10892.1.400.10.1.6.1' => 'operatingSystemOperatingSystemName',
3520 '1.3.6.1.4.1.674.10892.1.400.10.1.7.1' => 'operatingSystemOperatingSystemVersionName',
3523 my $operatingSystemTable = '1.3.6.1.4.1.674.10892.1.400.10.1';
3524 my $result = $snmp_session->get_table(-baseoid => $operatingSystemTable);
3526 if (defined $result) {
3527 foreach my $oid (keys %{ $result }) {
3528 if (exists $os_oid{$oid} and $os_oid{$oid} eq 'operatingSystemOperatingSystemName') {
3529 $sysinfo{osname} = ($result->{$oid});
3531 elsif (exists $os_oid{$oid} and $os_oid{$oid} eq 'operatingSystemOperatingSystemVersionName') {
3532 $sysinfo{osver} = $result->{$oid};
3537 my $msg = sprintf 'SNMP ERROR getting OS info: %s',
3538 $snmp_session->error;
3539 report('other', $msg, $E_UNKNOWN);
3545 # Fetch OMSA version via SNMP, put in sysinfo hash
3547 sub get_snmp_about {
3550 '1.3.6.1.4.1.674.10892.1.100.10.0' => 'systemManagementSoftwareGlobalVersionName',
3552 my $systemManagementSoftwareGroup = '1.3.6.1.4.1.674.10892.1.100';
3553 my $result = $snmp_session->get_table(-baseoid => $systemManagementSoftwareGroup);
3554 if (defined $result) {
3555 foreach my $oid (keys %{ $result }) {
3556 if (exists $omsa_oid{$oid} and $omsa_oid{$oid} eq 'systemManagementSoftwareGlobalVersionName') {
3557 $sysinfo{om} = ($result->{$oid});
3562 my $msg = sprintf 'SNMP ERROR getting OMSA info: %s',
3563 $snmp_session->error;
3564 report('other', $msg, $E_UNKNOWN);
3570 # Collects some information about the system
3574 # Get system model and serial number
3575 $snmp ? get_snmp_chassis_info() : get_omreport_chassis_info();
3577 # Get BIOS information. Only if needed
3578 if ( $opt{okinfo} >= 1
3580 or (defined $opt{postmsg} and $opt{postmsg} =~ m/[%][bd]/xms) ) {
3581 $snmp ? get_snmp_chassis_bios() : get_omreport_chassis_bios();
3584 # Return now if debug
3585 return if $opt{debug};
3587 # Get OS information. Only if needed
3588 if (defined $opt{postmsg} and $opt{postmsg} =~ m/[%][or]/xms) {
3589 $snmp ? get_snmp_system_operatingsystem() : get_omreport_system_operatingsystem();
3592 # Get OMSA information. Only if needed
3593 if ($opt{okinfo} >= 3) {
3594 $snmp ? get_snmp_about() : get_omreport_about();
3601 # Helper function for running omreport when the results are strictly
3603 sub run_omreport_info {
3604 my $command = shift;
3608 # Run omreport and fetch output
3609 my $rawtext = slurp_command("$omreport $command -fmt ssv 2>&1");
3611 # Parse output, store in array
3612 for ((split /\n/xms, $rawtext)) {
3613 if (m/\A Error/xms) {
3614 my $msg = "Problem running 'omreport $command': $_";
3615 report('other', $msg, $E_UNKNOWN);
3617 next if !m/;/xms; # ignore lines with less than two fields
3618 my @vals = split m/;/xms;
3619 $output{$vals[0]} = $vals[1];
3622 # Finally, return the collected information
3626 # Get various firmware information (BMC, RAC)
3627 sub get_firmware_info {
3628 my @snmp_output = ();
3629 my %nrpe_output = ();
3634 '1.3.6.1.4.1.674.10892.1.300.60.1.7.1' => 'firmwareType',
3635 '1.3.6.1.4.1.674.10892.1.300.60.1.8.1' => 'firmwareTypeName',
3636 '1.3.6.1.4.1.674.10892.1.300.60.1.11.1' => 'firmwareVersionName',
3639 my $firmwareTable = '1.3.6.1.4.1.674.10892.1.300.60.1';
3640 my $result = $snmp_session->get_table(-baseoid => $firmwareTable);
3642 # Some don't have this OID, this is ok
3643 if (!defined $result) {
3647 @snmp_output = @{ get_snmp_output($result, \%fw_oid) };
3650 %nrpe_output = %{ run_omreport_info("$omopt_chassis info") };
3653 my %fw_type # Firmware types
3655 1 => 'other', # other than following values
3656 2 => 'unknown', # unknown
3657 3 => 'systemBIOS', # System BIOS
3658 4 => 'embeddedSystemManagementController', # Embedded System Management Controller
3659 5 => 'powerSupplyParallelingBoard', # Power Supply Paralleling Board
3660 6 => 'systemBackPlane', # System (Primary) Backplane
3661 7 => 'powerVault2XXSKernel', # PowerVault 2XXS Kernel
3662 8 => 'powerVault2XXSApplication', # PowerVault 2XXS Application
3663 9 => 'frontPanel', # Front Panel Controller
3664 10 => 'baseboardManagementController', # Baseboard Management Controller
3665 11 => 'hotPlugPCI', # Hot Plug PCI Controller
3666 12 => 'sensorData', # Sensor Data Records
3667 13 => 'peripheralBay', # Peripheral Bay Backplane
3668 14 => 'secondaryBackPlane', # Secondary Backplane for ESM 2 systems
3669 15 => 'secondaryBackPlaneESM3And4', # Secondary Backplane for ESM 3 and 4 systems
3670 16 => 'rac', # Remote Access Controller
3671 17 => 'imc' # Integrated Management Controller
3676 foreach my $out (@snmp_output) {
3677 if ($fw_type{$out->{firmwareType}} eq 'baseboardManagementController') {
3678 $sysinfo{'bmc'} = 1;
3679 $sysinfo{'bmc_fw'} = $out->{firmwareVersionName};
3681 elsif ($fw_type{$out->{firmwareType}} =~ m{\A rac|imc \z}xms) {
3682 my $name = $out->{firmwareTypeName}; $name =~ s/\s//gxms;
3683 $sysinfo{'rac'} = 1;
3684 $sysinfo{'rac_name'} = $name;
3685 $sysinfo{'rac_fw'} = $out->{firmwareVersionName};
3690 foreach my $key (keys %nrpe_output) {
3691 next if !defined $nrpe_output{$key};
3692 if ($key eq 'BMC Version' or $key eq 'Baseboard Management Controller Version') {
3693 $sysinfo{'bmc'} = 1;
3694 $sysinfo{'bmc_fw'} = $nrpe_output{$key};
3696 elsif ($key =~ m{\A (i?DRAC)\s*(\d?)\s+Version}xms) {
3698 $sysinfo{'rac'} = 1;
3699 $sysinfo{'rac_fw'} = $nrpe_output{$key};
3700 $sysinfo{'rac_name'} = $name;
3710 #=====================================================================
3712 #=====================================================================
3714 # Here we do the actual checking of components
3715 # Check global status if applicable
3717 $globalstatus = check_global();
3720 # Do multiple selected checks
3721 if ($check{storage}) { check_storage(); }
3722 if ($check{memory}) { check_memory(); }
3723 if ($check{fans}) { check_fans(); }
3724 if ($check{power}) { check_powersupplies(); }
3725 if ($check{temp}) { check_temperatures(); }
3726 if ($check{cpu}) { check_processors(); }
3727 if ($check{voltage}) { check_volts(); }
3728 if ($check{batteries}) { check_batteries(); }
3729 if ($check{amperage}) { check_pwrmonitoring(); }
3730 if ($check{intrusion}) { check_intrusion(); }
3731 if ($check{alertlog}) { check_alertlog(); }
3732 if ($check{esmlog}) { check_esmlog(); }
3733 if ($check{esmhealth}) { check_esmlog_health(); }
3736 #---------------------------------------------------------------------
3738 #---------------------------------------------------------------------
3749 # Get system information
3752 # Get firmware info if requested via option
3753 if ($opt{okinfo} >= 1) {
3754 get_firmware_info();
3757 # Close SNMP session
3759 $snmp_session->close;
3764 print " System: $sysinfo{model}\n";
3765 print " ServiceTag: $sysinfo{serial}\n";
3766 print " BIOS/date: $sysinfo{bios} $sysinfo{biosdate}\n";
3767 if ($#report_storage >= 0) {
3768 print "-----------------------------------------------------------------------------\n";
3769 print " Storage Components \n";
3770 print "=============================================================================\n";
3771 print " STATE | ID | MESSAGE TEXT \n";
3772 print "---------+----------+--------------------------------------------------------\n";
3773 foreach (@report_storage) {
3774 my ($msg, $level, $nexus) = @{$_};
3775 print q{ } x (8 - length $reverse_exitcode{$level}) . "$reverse_exitcode{$level} | "
3776 . q{ } x (8 - length $nexus) . "$nexus | $msg\n";
3777 $nagios_alert_count{$reverse_exitcode{$level}}++;
3780 if ($#report_chassis >= 0) {
3781 print "-----------------------------------------------------------------------------\n";
3782 print " Chassis Components \n";
3783 print "=============================================================================\n";
3784 print " STATE | ID | MESSAGE TEXT \n";
3785 print "---------+------+------------------------------------------------------------\n";
3786 foreach (@report_chassis) {
3787 my ($msg, $level, $nexus) = @{$_};
3788 print q{ } x (8 - length $reverse_exitcode{$level}) . "$reverse_exitcode{$level} | "
3789 . q{ } x (4 - length $nexus) . "$nexus | $msg\n";
3790 $nagios_alert_count{$reverse_exitcode{$level}}++;
3793 if ($#report_other >= 0) {
3794 print "-----------------------------------------------------------------------------\n";
3795 print " Other messages \n";
3796 print "=============================================================================\n";
3797 print " STATE | MESSAGE TEXT \n";
3798 print "---------+-------------------------------------------------------------------\n";
3799 foreach (@report_other) {
3800 my ($msg, $level, $nexus) = @{$_};
3801 print q{ } x (8 - length $reverse_exitcode{$level}) . "$reverse_exitcode{$level} | $msg\n";
3802 $nagios_alert_count{$reverse_exitcode{$level}}++;
3807 my $c = 0; # counter to determine linebreaks
3809 # Run through each message, sorted by severity level
3811 foreach (sort {$a->[1] < $b->[1]} (@report_storage, @report_chassis, @report_other)) {
3812 my ($msg, $level, $nexus) = @{ $_ };
3813 next ALERT if $level == $E_OK;
3815 if (defined $opt{only}) {
3816 # If user wants only critical alerts
3817 next ALERT if ($opt{only} eq 'critical' and $level == $E_WARNING);
3819 # If user wants only warning alerts
3820 next ALERT if ($opt{only} eq 'warning' and $level == $E_CRITICAL);
3823 # Prefix with service tag if specified with option '-i|--info'
3825 if (defined $opt{htmlinfo}) {
3826 $msg = '[<a href="' . warranty_url($sysinfo{serial})
3827 . "\">$sysinfo{serial}</a>] " . $msg;
3830 $msg = "[$sysinfo{serial}] " . $msg;
3834 # Prefix with nagios level if specified with option '--state'
3835 $msg = $reverse_exitcode{$level} . ": $msg" if $opt{state};
3837 # Prefix with one-letter nagios level if specified with option '--short-state'
3838 $msg = (substr $reverse_exitcode{$level}, 0, 1) . ": $msg" if $opt{shortstate};
3840 ($c++ == 0) ? print $msg : print $linebreak, $msg;
3842 $nagios_alert_count{$reverse_exitcode{$level}}++;
3846 # Determine our exit code
3848 $exit_code = $E_UNKNOWN if $nagios_alert_count{'UNKNOWN'} > 0;
3849 $exit_code = $E_WARNING if $nagios_alert_count{'WARNING'} > 0;
3850 $exit_code = $E_CRITICAL if $nagios_alert_count{'CRITICAL'} > 0;
3852 # Global status via SNMP.. extra safety check
3853 if ($globalstatus != $E_OK && $exit_code == $E_OK && !defined $opt{only}) {
3854 print "OOPS! Something is wrong with this server, but I don't know what. ";
3855 print "The global system health status is $reverse_exitcode{$globalstatus}, ";
3856 print "but every component check is OK. This may be a bug in the Nagios plugin, ";
3857 print "please file a bug report.\n";
3862 if ($exit_code == $E_OK && defined $opt{only} && $opt{only} !~ m{\A critical|warning|chassis \z}xms && !$opt{debug}) {
3864 = ( 'storage' => "STORAGE OK - $count{pdisk} physical drives, $count{vdisk} logical drives",
3865 'fans' => $count{fan} == 0 && $blade ? 'OK - blade system with no fan probes' : "FANS OK - $count{fan} fan probes checked",
3866 'temp' => "TEMPERATURES OK - $count{temp} temperature probes checked",
3867 'memory' => "MEMORY OK - $count{dimm} memory modules checked",
3868 'power' => $count{power} == 0 ? 'OK - no instrumented power supplies found' : "POWER OK - $count{power} power supplies checked",
3869 'cpu' => "PROCESSORS OK - $count{cpu} processors checked",
3870 'voltage' => "VOLTAGE OK - $count{volt} voltage probes checked",
3871 'batteries' => $count{bat} == 0 ? 'OK - no batteries found' : "BATTERIES OK - $count{bat} batteries checked",
3872 'amperage' => $count{amp} == 0 ? 'OK - no power monitoring probes found' : "AMPERAGE OK - $count{amp} amperage (power monitoring) probes checked",
3873 'intrusion' => $count{intr} == 0 ? 'OK - no intrusion detection probes found' : "INTRUSION OK - $count{intr} intrusion detection probes checked",
3874 'alertlog' => $snmp ? 'OK - not supported via snmp' : "OK - Alert Log content: $count{alert}{Ok} ok, $count{alert}{'Non-Critical'} warning and $count{alert}{Critical} critical",
3875 'esmlog' => "OK - ESM Log content: $count{esm}{Ok} ok, $count{esm}{'Non-Critical'} warning and $count{esm}{Critical} critical",
3876 'esmhealth' => "ESM LOG OK - less than 80% used",
3879 print $okmsg{$opt{only}};
3881 elsif ($exit_code == $E_OK && !$opt{debug}) {
3882 if (defined $opt{htmlinfo}) {
3883 printf q{OK - System: '<a href="%s">%s</a>', SN: '<a href="%s">%s</a>', hardware working fine},
3884 documentation_url($sysinfo{model}), $sysinfo{model},
3885 warranty_url($sysinfo{serial}), $sysinfo{serial};
3888 printf q{OK - System: '%s', SN: '%s', hardware working fine},
3889 $sysinfo{model}, $sysinfo{serial};
3892 if ($check{storage}) {
3893 printf ', %d logical drives, %d physical drives',
3894 $count{vdisk}, $count{pdisk};
3897 print ', not checking storage';
3900 if ($opt{okinfo} >= 1) {
3902 printf q{----- BIOS='%s %s'}, $sysinfo{bios}, $sysinfo{biosdate};
3904 if ($sysinfo{rac}) {
3905 printf q{, %s='%s'}, $sysinfo{rac_name}, $sysinfo{rac_fw};
3907 if ($sysinfo{bmc}) {
3908 printf q{, BMC='%s'}, $sysinfo{bmc_fw};
3912 if ($opt{okinfo} >= 2) {
3913 if ($check{storage}) {
3914 my @storageprint = ();
3915 foreach my $id (sort keys %{ $sysinfo{controller} }) {
3916 chomp $sysinfo{controller}{$id}{driver};
3917 push @storageprint, sprintf q{----- CTRL %s (%s): FW='%s', DR='%s'},
3918 $sysinfo{controller}{$id}{id}, $sysinfo{controller}{$id}{name},
3919 $sysinfo{controller}{$id}{firmware}, $sysinfo{controller}{$id}{driver};
3921 foreach my $id (sort keys %{ $sysinfo{enclosure} }) {
3922 push @storageprint, sprintf q{----- ENCL %s (%s): FW='%s'},
3923 $sysinfo{enclosure}{$id}->{id}, $sysinfo{enclosure}{$id}->{name},
3924 $sysinfo{enclosure}{$id}->{firmware};
3928 foreach my $line (@storageprint) {
3929 print $linebreak, $line;
3934 if ($opt{okinfo} >= 3) {
3935 print "$linebreak----- OpenManage Server Administrator (OMSA) version: '$sysinfo{om}'";
3940 if ($opt{extinfo}) {
3942 if (defined $opt{htmlinfo}) {
3943 printf '------ SYSTEM: <a href="%s">%s</a>, SN: <a href="%s">%s</a>',
3944 documentation_url($sysinfo{model}), $sysinfo{model},
3945 warranty_url($sysinfo{serial}), $sysinfo{serial};
3948 printf '------ SYSTEM: %s, SN: %s',
3949 $sysinfo{model}, $sysinfo{serial};
3952 if (defined $opt{postmsg}) {
3954 if (-f $opt{postmsg}) {
3955 open my $POST, '<', $opt{postmsg}
3956 or ( print $linebreak
3957 and print "ERROR: Couldn't open post message file $opt{postmsg}: $!\n"
3958 and exit $E_UNKNOWN );
3964 $post = $opt{postmsg};
3966 if (defined $post) {
3968 $post =~ s{[%]s}{$sysinfo{serial}}gxms;
3969 $post =~ s{[%]m}{$sysinfo{model}}gxms;
3970 $post =~ s{[%]b}{$sysinfo{bios}}gxms;
3971 $post =~ s{[%]d}{$sysinfo{biosdate}}gxms;
3972 $post =~ s{[%]o}{$sysinfo{osname}}gxms;
3973 $post =~ s{[%]r}{$sysinfo{osver}}gxms;
3974 $post =~ s{[%]p}{$count{pdisk}}gxms;
3975 $post =~ s{[%]l}{$count{vdisk}}gxms;
3976 $post =~ s{[%]n}{$linebreak}gxms;
3977 $post =~ s{[%]{2}}{%}gxms;
3983 # Print performance data
3984 if (defined $opt{perfdata} && !$opt{debug} && %perfdata) {
3985 my $lb = $opt{perfdata} eq 'multiline' ? "\n" : q{ }; # line break for perfdata
3996 return ($order{(split /_/, $a, 2)[0]} cmp $order{(split /_/, $b, 2)[0]}) || $a cmp $b;
3999 print join $lb, map { "'$_'=$perfdata{$_}" } sort perfdata keys %perfdata;
4001 print "\n" if !$opt{debug};
4003 # Exit with proper exit code