5 # Monitor Dell server hardware status using Dell OpenManage Server
6 # Administrator, either locally via NRPE, or remotely via SNMP.
10 # Copyright (C) 2010 Trond H. Amundsen
12 # This program is free software: you can redistribute it and/or modify
13 # it under the terms of the GNU General Public License as published by
14 # the Free Software Foundation, either version 3 of the License, or
15 # (at your option) any later version.
17 # This program is distributed in the hope that it will be useful, but
18 # WITHOUT ANY WARRANTY; without even the implied warranty of
19 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 # General Public License for more details.
22 # You should have received a copy of the GNU General Public License
23 # along with this program. If not, see <http://www.gnu.org/licenses/>.
26 require 5.006; # Perl v5.6.0 or newer is required
29 use POSIX qw(isatty ceil);
30 use Getopt::Long qw(:config no_ignore_case);
32 # Global (package) variables used throughout the code
33 use vars qw( $NAME $VERSION $AUTHOR $CONTACT $E_OK $E_WARNING $E_CRITICAL
34 $E_UNKNOWN $FW_LOCK $USAGE $HELP $LICENSE
35 $snmp_session $snmp_error $omreport $globalstatus $global
36 $linebreak $omopt_chassis $omopt_system $blade
37 $exit_code $snmp $original_sigwarn
38 %check %opt %perfdata %reverse_exitcode %status2nagios
39 %snmp_status %snmp_probestatus %probestatus2nagios %sysinfo
40 %blacklist %nagios_alert_count %count
41 @perl_warnings @controllers @enclosures
42 @report_storage @report_chassis @report_other
45 #---------------------------------------------------------------------
46 # Initialization and global variables
47 #---------------------------------------------------------------------
49 # Small subroutine to collect any perl warnings during execution
50 sub collect_perl_warning {
51 push @perl_warnings, [@_];
54 # Set the WARN signal to use our collect subroutine above
55 $original_sigwarn = $SIG{__WARN__};
56 $SIG{__WARN__} = \&collect_perl_warning;
58 # Version and similar info
59 $NAME = 'check_openmanage';
60 $VERSION = '3.5.6-beta4';
61 $AUTHOR = 'Trond H. Amundsen';
62 $CONTACT = 't.h.amundsen@usit.uio.no';
70 # Firmware update lock file [FIXME: location on Windows?]
71 $FW_LOCK = '/var/lock/.spsetup'; # default on Linux
74 $USAGE = <<"END_USAGE";
75 Usage: $NAME [OPTION]...
83 -p, --perfdata Output performance data
84 -t, --timeout Plugin timeout in seconds
85 -c, --critical Customise temperature critical limits
86 -w, --warning Customise temperature warning limits
87 -d, --debug Debug output, reports everything
88 -h, --help Display this help text
89 -V, --version Display version info
93 -H, --hostname Hostname or IP of the server (needed for SNMP)
94 -C, --community SNMP community string
95 -P, --protocol SNMP protocol version
96 --port SNMP port number
100 -i, --info Prefix any alerts with the service tag
101 -e, --extinfo Append system info to alerts
102 -s, --state Prefix alerts with alert state
103 -S, --short-state Prefix alerts with alert state (abbreviated)
104 -o, --okinfo Verbosity when check result is OK
105 -I, --htmlinfo HTML output with clickable links
107 CHECK CONTROL AND BLACKLISTING:
109 -a, --all Check everything, even log content
110 -b, --blacklist Blacklist missing and/or failed components
111 --only Only check a certain component or alert type
112 --check Fine-tune which components are checked
114 For more information and advanced options, see the manual page or URL:
115 http://folk.uio.no/trondham/software/check_openmanage.html
118 # Version and license text
119 $LICENSE = <<"END_LICENSE";
121 Copyright (C) 2010 $AUTHOR
122 License GPLv3+: GNU GPL version 3 or later <http://gnu.org/licenses/gpl.html>
123 This is free software: you are free to change and redistribute it.
124 There is NO WARRANTY, to the extent permitted by law.
126 Written by $AUTHOR <$CONTACT>
129 # Options with default values
130 %opt = ( 'blacklist' => [],
134 'timeout' => 30, # default timeout is 30 seconds
144 'okinfo' => 0, # default "ok" output level
145 'linebreak' => undef,
150 'port' => 161, # default SNMP port
152 'community' => 'public', # SMNP v1 or v2c
154 'username' => undef, # SMNP v3
155 'authpassword' => undef, # SMNP v3
156 'authkey' => undef, # SMNP v3
157 'authprotocol' => undef, # SMNP v3
158 'privpassword' => undef, # SMNP v3
159 'privkey' => undef, # SMNP v3
160 'privprotocol' => undef, # SMNP v3
164 GetOptions('b|blacklist=s' => \@{ $opt{blacklist} },
165 'check=s' => \@{ $opt{check} },
166 'c|critical=s' => \@{ $opt{critical} },
167 'w|warning=s' => \@{ $opt{warning} },
168 't|timeout=i' => \$opt{timeout},
169 'd|debug' => \$opt{debug},
170 'h|help' => \$opt{help},
171 'V|version' => \$opt{version},
172 'p|perfdata:s' => \$opt{perfdata},
173 'i|info' => \$opt{info},
174 'e|extinfo' => \$opt{extinfo},
175 'I|htmlinfo:s' => \$opt{htmlinfo},
176 'postmsg=s' => \$opt{postmsg},
177 's|state' => \$opt{state},
178 'S|short-state' => \$opt{shortstate},
179 'o|ok-info=i' => \$opt{okinfo},
180 'l|linebreak=s' => \$opt{linebreak},
181 'a|all' => \$opt{all},
182 'only=s' => \$opt{only},
183 'omreport=s' => \$opt{omreport},
184 'port=i' => \$opt{port},
185 'H|hostname=s' => \$opt{hostname},
186 'C|community=s' => \$opt{community},
187 'P|protocol=i' => \$opt{protocol},
188 'U|username=s' => \$opt{username},
189 'authpassword=s' => \$opt{authpassword},
190 'authkey=s' => \$opt{authkey},
191 'authprotocol=s' => \$opt{authprotocol},
192 'privpassword=s' => \$opt{privpassword},
193 'privkey=s' => \$opt{privkey},
194 'privprotocol=s' => \$opt{privprotocol},
195 ) or do { print $USAGE; exit $E_UNKNOWN };
197 # If user requested help
203 # If user requested version info
211 print "PLUGIN TIMEOUT: $NAME timed out after $opt{timeout} seconds\n";
216 # If we're using SNMP
217 $snmp = defined $opt{hostname} ? 1 : 0;
219 # SNMP session variables
220 $snmp_session = undef;
223 # The omreport command
226 # Check flags, override available with the --check option
227 %check = ( 'storage' => 1, # check storage subsystem
228 'memory' => 1, # check memory (dimms)
229 'fans' => 1, # check fan status
230 'power' => 1, # check power supplies
231 'temp' => 1, # check temperature
232 'cpu' => 1, # check processors
233 'voltage' => 1, # check voltage
234 'batteries' => 1, # check battery probes
235 'amperage' => 1, # check power consumption
236 'intrusion' => 1, # check intrusion detection
237 'alertlog' => 0, # check the alert log
238 'esmlog' => 0, # check the ESM log (hardware log)
239 'esmhealth' => 1, # check the ESM log overall health
243 $linebreak = isatty(*STDOUT) ? "\n" : '<br/>';
245 # Line break from option
246 if (defined $opt{linebreak}) {
247 if ($opt{linebreak} eq 'REG') {
250 elsif ($opt{linebreak} eq 'HTML') {
251 $linebreak = '<br/>';
254 $linebreak = $opt{linebreak};
258 # Exit with status=UNKNOWN if there is firmware upgrade in progress
259 if (!$snmp && -f $FW_LOCK) {
260 print "MONITORING DISABLED - Firmware update in progress ($FW_LOCK exists)\n";
264 # List of controllers and enclosures
265 @controllers = (); # controllers
266 @enclosures = (); # enclosures
269 @report_storage = (); # messages with associated nagios level (storage)
270 @report_chassis = (); # messages with associated nagios level (chassis)
271 @report_other = (); # messages with associated nagios level (other)
273 # Counters for everything
276 'pdisk' => 0, # number of physical disks
277 'vdisk' => 0, # number of logical drives (virtual disks)
278 'temp' => 0, # number of temperature probes
279 'volt' => 0, # number of voltage probes
280 'amp' => 0, # number of amperage probes
281 'intr' => 0, # number of intrusion probes
282 'dimm' => 0, # number of memory modules
283 'fan' => 0, # number of fan probes
284 'cpu' => 0, # number of CPUs
285 'bat' => 0, # number of batteries
286 'power' => 0, # number of power supplies
288 'Critical' => 0, # critical entries in ESM log
289 'Non-Critical' => 0, # warning entries in ESM log
290 'Ok' => 0, # ok entries in ESM log
293 'Critical' => 0, # critical entries in alert log
294 'Non-Critical' => 0, # warning entries in alert log
295 'Ok' => 0, # ok entries in alert log
302 # Global health status
303 $global = 1; # default is to check global status
304 $globalstatus = $E_OK; # default global health status is "OK"
306 # Nagios error levels reversed
310 $E_WARNING => 'WARNING',
311 $E_CRITICAL => 'CRITICAL',
312 $E_UNKNOWN => 'UNKNOWN',
315 # OpenManage (omreport) and SNMP error levels
318 'Unknown' => $E_CRITICAL,
319 'Critical' => $E_CRITICAL,
320 'Non-Critical' => $E_WARNING,
322 'Non-Recoverable' => $E_CRITICAL,
323 'Other' => $E_CRITICAL,
334 6 => 'Non-Recoverable',
337 # Probe Status via SNMP
340 1 => 'Other', # probe status is not one of the following:
341 2 => 'Unknown', # probe status is unknown (not known or monitored)
342 3 => 'Ok', # probe is reporting a value within the thresholds
343 4 => 'nonCriticalUpper', # probe has crossed upper noncritical threshold
344 5 => 'criticalUpper', # probe has crossed upper critical threshold
345 6 => 'nonRecoverableUpper', # probe has crossed upper non-recoverable threshold
346 7 => 'nonCriticalLower', # probe has crossed lower noncritical threshold
347 8 => 'criticalLower', # probe has crossed lower critical threshold
348 9 => 'nonRecoverableLower', # probe has crossed lower non-recoverable threshold
349 10 => 'failed', # probe is not functional
352 # Probe status translated to Nagios alarm levels
355 'Other' => $E_CRITICAL,
356 'Unknown' => $E_CRITICAL,
358 'nonCriticalUpper' => $E_WARNING,
359 'criticalUpper' => $E_CRITICAL,
360 'nonRecoverableUpper' => $E_CRITICAL,
361 'nonCriticalLower' => $E_WARNING,
362 'criticalLower' => $E_CRITICAL,
363 'nonRecoverableLower' => $E_CRITICAL,
364 'failed' => $E_CRITICAL,
367 # System information gathered
370 'bios' => 'N/A', # BIOS version
371 'biosdate' => 'N/A', # BIOS release date
372 'serial' => 'N/A', # serial number (service tag)
373 'model' => 'N/A', # system model
374 'osname' => 'N/A', # OS name
375 'osver' => 'N/A', # OS version
376 'om' => 'N/A', # OMSA version
377 'bmc' => 0, # HAS baseboard management controller (BMC)
378 'rac' => 0, # HAS remote access controller (RAC)
379 'rac_name' => 'N/A', # remote access controller (RAC)
380 'bmc_fw' => 'N/A', # BMC firmware
381 'rac_fw' => 'N/A', # RAC firmware
384 # Adjust which checks to perform
385 adjust_checks() if defined $opt{check};
387 # Blacklisted components
388 %blacklist = defined $opt{blacklist} ? %{ get_blacklist() } : ();
390 # If blacklisting is in effect, don't check global health status
391 if (scalar keys %blacklist > 0) {
395 # Take into account new hardware and blades
396 $omopt_chassis = 'chassis'; # default "chassis" option to omreport
397 $omopt_system = 'system'; # default "system" option to omreport
398 $blade = 0; # if this is a blade system
400 # Some initializations and checking before we begin
402 snmp_initialize(); # initialize SNMP
403 snmp_check(); # check that SNMP works
404 snmp_detect_blade(); # detect blade via SNMP
407 # Find the omreport binary
409 # Check help output from omreport, see which options are available.
410 # Also detecting blade via omreport.
411 check_omreport_options();
415 #---------------------------------------------------------------------
417 #---------------------------------------------------------------------
420 # Store a message in one of the message arrays
423 my ($type, $msg, $exval, $id) = @_;
424 defined $id or $id = q{};
428 'storage' => \@report_storage,
429 'chassis' => \@report_chassis,
430 'other' => \@report_other,
433 return push @{ $type2array{$type} }, [ $msg, $exval, $id ];
438 # Run command, put resulting output lines in an array and return a
439 # pointer to that array
444 open my $CMD, '-|', $command
445 or do { report('other', "Couldn't run command '$command': $!", $E_UNKNOWN)
449 or do { report('other', "Couldn't close filehandle for command '$command': $!", $E_UNKNOWN)
450 and return \@lines };
455 # Run command, put resulting output in a string variable and return it
460 open my $CMD, '-|', $command
461 or do { report('other', "Couldn't run command '$command': $!", $E_UNKNOWN) and return };
462 my $rawtext = do { local $/ = undef; <$CMD> }; # slurping
465 # NOTE: We don't check the return value of close() since omreport
466 # does something weird sometimes.
474 sub snmp_initialize {
475 # Legal SNMP v3 protocols
476 my $snmp_v3_privprotocol = qr{\A des|aes|aes128|3des|3desde \z}xms;
477 my $snmp_v3_authprotocol = qr{\A md5|sha \z}xms;
479 # Parameters to Net::SNMP->session()
482 '-port' => $opt{port},
483 '-hostname' => $opt{hostname},
484 '-version' => $opt{protocol},
487 # Parameters for SNMP v3
488 if ($opt{protocol} == 3) {
490 # Username is mandatory
491 if (defined $opt{username}) {
492 $param{'-username'} = $opt{username};
495 print "SNMP ERROR: With SNMPv3 the username must be specified\n";
499 # Authpassword is optional
500 if (defined $opt{authpassword}) {
501 $param{'-authpassword'} = $opt{authpassword};
504 # Authkey is optional
505 if (defined $opt{authkey}) {
506 $param{'-authkey'} = $opt{authkey};
509 # Privpassword is optional
510 if (defined $opt{privpassword}) {
511 $param{'-privpassword'} = $opt{privpassword};
514 # Privkey is optional
515 if (defined $opt{privkey}) {
516 $param{'-privkey'} = $opt{privkey};
519 # Privprotocol is optional
520 if (defined $opt{privprotocol}) {
521 if ($opt{privprotocol} =~ m/$snmp_v3_privprotocol/xms) {
522 $param{'-privprotocol'} = $opt{privprotocol};
525 print "SNMP ERROR: Unknown privprotocol '$opt{privprotocol}', "
526 . "must be one of [des|aes|aes128|3des|3desde]\n";
531 # Authprotocol is optional
532 if (defined $opt{authprotocol}) {
533 if ($opt{authprotocol} =~ m/$snmp_v3_authprotocol/xms) {
534 $param{'-authprotocol'} = $opt{authprotocol};
537 print "SNMP ERROR: Unknown authprotocol '$opt{authprotocol}', "
538 . "must be one of [md5|sha]\n";
543 # Parameters for SNMP v2c or v1
544 elsif ($opt{protocol} == 2 or $opt{protocol} == 1) {
545 $param{'-community'} = $opt{community};
548 print "SNMP ERROR: Unknown SNMP version '$opt{protocol}'\n";
552 # Try to initialize the SNMP session
553 if ( eval { require Net::SNMP; 1 } ) {
554 ($snmp_session, $snmp_error) = Net::SNMP->session( %param );
555 if (!defined $snmp_session) {
556 printf "SNMP: %s\n", $snmp_error;
561 print "You need perl module Net::SNMP to run $NAME in SNMP mode\n";
568 # Checking if SNMP works by probing for "chassisModelName", which all
569 # servers should have
572 my $chassisModelName = '1.3.6.1.4.1.674.10892.1.300.10.1.9.1';
573 my $result = $snmp_session->get_request(-varbindlist => [$chassisModelName]);
575 # Typically if remote host isn't responding
576 if (!defined $result) {
577 printf "SNMP CRITICAL: %s\n", $snmp_session->error;
581 # If OpenManage isn't installed or is not working
582 if ($result->{$chassisModelName} =~ m{\A noSuch (Instance|Object) \z}xms) {
583 print "ERROR: (SNMP) OpenManage is not installed or is not working correctly\n";
590 # Detecting blade via SNMP
592 sub snmp_detect_blade {
593 my $DellBaseBoardType = '1.3.6.1.4.1.674.10892.1.300.80.1.7.1.1';
594 my $result = $snmp_session->get_request(-varbindlist => [$DellBaseBoardType]);
596 # Identify blade. Older models (4th and 5th gen models) and/or old
597 # OMSA (4.x) don't have this OID. If we get "noSuchInstance" or
598 # similar, we assume that this isn't a blade
599 if (exists $result->{$DellBaseBoardType} && $result->{$DellBaseBoardType} eq '3') {
606 # Locate the omreport binary
609 # If user has specified path to omreport
610 if (defined $opt{omreport} and -x $opt{omreport}) {
611 $omreport = qq{"$opt{omreport}"};
615 # Possible full paths for omreport
618 '/usr/bin/omreport', # default on Linux
619 '/opt/dell/srvadmin/bin/omreport', # default on Linux with OMSA 6.2.0
620 '/opt/dell/srvadmin/oma/bin/omreport.sh', # alternate on Linux
621 '/opt/dell/srvadmin/oma/bin/omreport', # alternate on Linux
622 'C:\Program Files (x86)\Dell\SysMgt\oma\bin\omreport.exe', # default on Windows x64
623 'C:\Program Files\Dell\SysMgt\oma\bin\omreport.exe', # default on Windows x32
624 'c:\progra~1\dell\sysmgt\oma\bin\omreport.exe', # 8bit legacy default on Windows x32
625 'c:\progra~2\dell\sysmgt\oma\bin\omreport.exe', # 8bit legacy default on Windows x64
628 # Find the one to use
630 foreach my $bin (@omreport_paths) {
632 $omreport = qq{"$bin"};
637 # Exit with status=UNKNOWN if OM is not installed, or we don't
638 # have permission to execute the binary
639 if (!defined $omreport) {
640 print "ERROR: Dell OpenManage Server Administrator (OMSA) is not installed\n";
647 # Checks output from 'omreport -?' and searches for arguments to
648 # omreport, to accommodate deprecated options "chassis" and "system"
649 # (on newer hardware), as well as blade servers.
651 sub check_omreport_options {
652 foreach (@{ run_command("$omreport -? 2>&1") }) {
653 if (m/\A servermodule /xms) {
654 # If "servermodule" argument to omreport exists, use it
655 # instead of argument "system"
656 $omopt_system = 'servermodule';
658 elsif (m/\A mainsystem /xms) {
659 # If "mainsystem" argument to omreport exists, use it
660 # instead of argument "chassis"
661 $omopt_chassis = 'mainsystem';
663 elsif (m/\A modularenclosure /xms) {
664 # If "modularenclusure" argument to omreport exists, assume
665 # that this is a blade
673 # Read the blacklist option and return a hash containing the
674 # blacklisted components
680 if (scalar @{ $opt{blacklist} } >= 0) {
681 foreach my $black (@{ $opt{blacklist} }) {
684 open my $BL, '<', $black
685 or do { report('other', "Couldn't open blacklist file $black: $!", $E_UNKNOWN)
698 return {} if $#bl < 0;
700 # Parse blacklist string, put in hash
701 foreach my $black (@bl) {
702 my @comps = split m{/}xms, $black;
703 foreach my $c (@comps) {
704 next if $c !~ m/=/xms;
705 my ($key, $val) = split /=/xms, $c;
706 my @vals = split /,/xms, $val;
707 $blacklist{$key} = \@vals;
715 # Read the check option and adjust the hash %check, which is a rough
716 # list of components to be checked
721 # Adjust checking based on the '--all' option
724 if (defined $opt{only} and $opt{only} !~ m{\A critical|warning \z}xms) {
725 print qq{ERROR: Wrong simultaneous usage of the "--all" and "--only" options\n};
728 if (scalar @{ $opt{check} } > 0) {
729 print qq{ERROR: Wrong simultaneous usage of the "--all" and "--check" options\n};
733 # set the check hash to check everything
734 map { $_ = 1 } values %check;
739 # Adjust checking based on the '--only' option
740 if (defined $opt{only} and $opt{only} !~ m{\A critical|warning \z}xms) {
742 if (scalar @{ $opt{check} } > 0) {
743 print qq{ERROR: Wrong simultaneous usage of the "--only" and "--check" options\n};
746 if (! exists $check{$opt{only}} && $opt{only} ne 'chassis') {
747 print qq{ERROR: "$opt{only}" is not a known keyword for the "--only" option\n};
751 # reset the check hash
752 map { $_ = 0 } values %check;
754 # adjust the check hash
755 if ($opt{only} eq 'chassis') {
756 map { $check{$_} = 1 } qw(memory fans power temp cpu voltage
757 batteries amperage intrusion esmhealth);
760 $check{$opt{only}} = 1;
766 # Adjust checking based on the '--check' option
767 if (scalar @{ $opt{check} } >= 0) {
768 foreach my $check (@{ $opt{check} }) {
771 open my $CL, '<', $check
772 or do { report('other', "Couldn't open check file $check: $!", $E_UNKNOWN) and return };
785 # Parse checklist string, put in hash
786 foreach my $check (@cl) {
787 my @checks = split /,/xms, $check;
788 foreach my $c (@checks) {
789 next if $c !~ m/=/xms;
790 my ($key, $val) = split /=/xms, $c;
795 # Check if we should check global health status
797 foreach (keys %check) {
798 next CHECK_KEY if $_ eq 'esmlog'; # not part of global status
799 next CHECK_KEY if $_ eq 'alertlog'; # not part of global status
801 if ($check{$_} == 0) { # found something with checking turned off
811 # Runs omreport and returns an array of anonymous hashes containing
813 # Takes one argument: string containing parameters to omreport
820 # Errors that are OK. Some low-end poweredge (and blades) models
821 # don't have RAID controllers, intrusion detection sensor, or
822 # redundant/instrumented power supplies etc.
825 Intrusion\sinformation\sis\snot\sfound\sfor\sthis\ssystem # No intrusion probe
826 | No\sinstrumented\spower\ssupplies\sfound\son\sthis\ssystem # No instrumented PS (blades/low-end)
827 | No\scontrollers\sfound # No RAID controller
828 | No\sbattery\sprobes\sfound\son\sthis\ssystem # No battery probes
829 | Invalid\scommand:\spwrmonitoring # Older OMSAs lack this command(?)
830 # | Current\sprobes\snot\sfound # No power monitoring capability
833 # Errors that are OK on blade servers
836 No\sfan\sprobes\sfound\son\sthis\ssystem # No fan probes
839 # Run omreport and fetch output
840 my $rawtext = slurp_command("$omreport $command -fmt ssv 2>&1");
841 return [] if !defined $rawtext;
843 # Workaround for Openmanage BUG introduced in OMSA 5.5.0
844 $rawtext =~ s{\n;}{;}gxms if $command eq 'storage controller';
846 # Openmanage sometimes puts a linebreak between "Error" and the
848 $rawtext =~ s{^Error\s*\n}{Error: }xms;
850 # Parse output, store in array
851 for ((split m{\n}xms, $rawtext)) {
853 next if m{$ok_errors}xms;
854 next if ($blade and m{$ok_blade_errors}xms);
855 report('other', "Problem running 'omreport $command': $_", $E_UNKNOWN);
858 next if !m/(.*?;){2}/xms; # ignore lines with less than 3 fields
859 my @vals = split /;/xms;
860 if ($vals[0] =~ m/\A (Index|ID|Severity|Processor|Current\sSpeed) \z/xms) {
865 push @output, { map { $_ => $vals[$i++] } @keys };
870 # Finally, return the collected information
876 # Checks if a component is blacklisted. Returns 1 if the component is
877 # blacklisted, 0 otherwise. Takes two arguments:
878 # arg1: component name
879 # arg2: component id or index
882 my $name = shift; # component name
883 my $id = shift; # component id
884 my $ret = 0; # return value
886 if (defined $blacklist{$name}) {
887 foreach my $comp (@{ $blacklist{$name} }) {
888 if (defined $id and ($comp eq $id or uc($comp) eq 'ALL')) {
897 # Converts the NexusID from SNMP to our version
900 $nexus =~ s{\A \\}{}xms;
901 $nexus =~ s{\\}{:}gxms;
905 # Sets custom temperature thresholds based on user supplied options
906 sub custom_temperature_thresholds {
907 my $type = shift; # type of threshold, either w (warning) or c (critical)
908 my %thres = (); # will contain the thresholds
909 my @limits = (); # holds the input
911 my @opt = $type eq 'w' ? @{ $opt{warning} } : @{ $opt{critical} };
913 if (scalar @opt >= 0) {
914 foreach my $t (@opt) {
918 or do { report('other', "Couldn't open temperature threshold file $t: $!",
919 $E_UNKNOWN) and return {} };
930 # Parse checklist string, put in hash
931 foreach my $th (@limits) {
932 my @tmp = split m{,}xms, $th;
933 foreach my $t (@tmp) {
934 next if $t !~ m{=}xms;
935 my ($key, $val) = split m{=}xms, $t;
936 if ($val =~ m{/}xms) {
937 my ($max, $min) = split m{/}xms, $val;
938 $thres{$key}{max} = $max;
939 $thres{$key}{min} = $min;
942 $thres{$key}{max} = $val;
951 # Gets the output from SNMP result according to the OIDs checked
952 sub get_snmp_output {
953 my ($result,$oidref) = @_;
957 foreach my $oid (keys %{ $result }) {
959 $short =~ s{\s}{}gxms; # remove whitespace
960 $short =~ s{\A (.+) \. (\d+) \z}{$1}xms; # remove last number
962 if (exists $oidref->{$short}) {
963 $temp[$id]{$oidref->{$short}} = $result->{$oid};
967 # Remove any empty indexes
968 foreach my $out (@temp) {
978 # Map the controller or other item in-place
980 my ($key, $val, $list) = @_;
982 foreach my $lst (@{ $list }) {
983 if (!exists $lst->{$key}) {
990 # Return the URL for official Dell documentation for a specific
992 sub documentation_url {
995 # create model short form, e.g. "r710"
996 $model =~ s{\A PowerEdge \s (.+?) \z}{lc($1)}exms;
998 # special case for blades (e.g. M600, M710), they have common
1000 $model =~ s{\A m\d+ \z}{m}xms;
1002 return 'http://support.dell.com/support/edocs/systems/pe' . $model . '/';
1005 # Return the URL for warranty information for a server with a given
1006 # serial number (servicetag)
1010 # Dell support sites for different parts of the world
1013 'emea' => 'http://support.euro.dell.com/support/topics/topic.aspx/emea/shared/support/my_systems_info/',
1014 'ap' => 'http://supportapj.dell.com/support/topics/topic.aspx/ap/shared/support/my_systems_info/en/details?',
1015 'glob' => 'http://support.dell.com/support/topics/global.aspx/support/my_systems_info/details?',
1018 # warranty URLs for different country codes
1022 'at' => $supportsite{emea} . 'de/details?c=at&l=de&ServiceTag=', # Austria
1023 'be' => $supportsite{emea} . 'nl/details?c=be&l=nl&ServiceTag=', # Belgium
1024 'cz' => $supportsite{emea} . 'cs/details?c=cz&l=cs&ServiceTag=', # Czech Republic
1025 'de' => $supportsite{emea} . 'de/details?c=de&l=de&ServiceTag=', # Germany
1026 'dk' => $supportsite{emea} . 'da/details?c=dk&l=da&ServiceTag=', # Denmark
1027 'es' => $supportsite{emea} . 'es/details?c=es&l=es&ServiceTag=', # Spain
1028 'fi' => $supportsite{emea} . 'fi/details?c=fi&l=fi&ServiceTag=', # Finland
1029 'fr' => $supportsite{emea} . 'fr/details?c=fr&l=fr&ServiceTag=', # France
1030 'gr' => $supportsite{emea} . 'en/details?c=gr&l=el&ServiceTag=', # Greece
1031 'it' => $supportsite{emea} . 'it/details?c=it&l=it&ServiceTag=', # Italy
1032 'il' => $supportsite{emea} . 'en/details?c=il&l=en&ServiceTag=', # Israel
1033 'me' => $supportsite{emea} . 'en/details?c=me&l=en&ServiceTag=', # Middle East
1034 'no' => $supportsite{emea} . 'no/details?c=no&l=no&ServiceTag=', # Norway
1035 'nl' => $supportsite{emea} . 'nl/details?c=nl&l=nl&ServiceTag=', # The Netherlands
1036 'pl' => $supportsite{emea} . 'pl/details?c=pl&l=pl&ServiceTag=', # Poland
1037 'pt' => $supportsite{emea} . 'en/details?c=pt&l=pt&ServiceTag=', # Portugal
1038 'ru' => $supportsite{emea} . 'ru/details?c=ru&l=ru&ServiceTag=', # Russia
1039 'se' => $supportsite{emea} . 'sv/details?c=se&l=sv&ServiceTag=', # Sweden
1040 'uk' => $supportsite{emea} . 'en/details?c=uk&l=en&ServiceTag=', # United Kingdom
1041 'za' => $supportsite{emea} . 'en/details?c=za&l=en&ServiceTag=', # South Africa
1043 'br' => $supportsite{glob} . 'c=br&l=pt&ServiceTag=', # Brazil
1044 'ca' => $supportsite{glob} . 'c=ca&l=en&ServiceTag=', # Canada
1045 'mx' => $supportsite{glob} . 'c=mx&l=es&ServiceTag=', # Mexico
1046 'us' => $supportsite{glob} . 'c=us&l=en&ServiceTag=', # USA
1048 'au' => $supportsite{ap} . 'c=au&l=en&ServiceTag=', # Australia
1049 'cn' => $supportsite{ap} . 'c=cn&l=zh&ServiceTag=', # China
1050 'in' => $supportsite{ap} . 'c=in&l=en&ServiceTag=', # India
1052 'XX' => $supportsite{glob} . 'ServiceTag=', # default
1055 if (exists $url{$opt{htmlinfo}}) {
1056 return $url{$opt{htmlinfo}} . $tag;
1059 return $url{XX} . $tag;
1065 #---------------------------------------------------------------------
1067 #---------------------------------------------------------------------
1069 #-----------------------------------------
1070 # Check global health status
1071 #-----------------------------------------
1077 # Checks global status, i.e. both storage and chassis
1079 my $systemStateGlobalSystemStatus = '1.3.6.1.4.1.674.10892.1.200.10.1.2.1';
1080 my $result = $snmp_session->get_request(-varbindlist => [$systemStateGlobalSystemStatus]);
1081 if (!defined $result) {
1082 printf "SNMP ERROR [global]: %s\n", $snmp_error;
1085 $health = $status2nagios{$snmp_status{$result->{$systemStateGlobalSystemStatus}}};
1089 # NB! This does not check storage, only chassis...
1091 foreach (@{ run_command("$omreport $omopt_system -fmt ssv") }) {
1093 next if m/\A SEVERITY;COMPONENT/xms;
1094 if (m/\A (.+?);Main\sSystem(\sChassis)? /xms) {
1095 $health = $status2nagios{$1};
1106 #-----------------------------------------
1107 # STORAGE: Check controllers
1108 #-----------------------------------------
1109 sub check_controllers {
1110 return if blacklisted('ctrl', 'all');
1119 my $firmware = undef;
1121 my $minstdr = undef; # Minimum required Storport driver version (whats this?)
1122 my $stdr = undef; # Storport driver version (whats this?)
1128 '1.3.6.1.4.1.674.10893.1.20.130.1.1.1' => 'controllerNumber',
1129 '1.3.6.1.4.1.674.10893.1.20.130.1.1.2' => 'controllerName',
1130 '1.3.6.1.4.1.674.10893.1.20.130.1.1.5' => 'controllerState',
1131 '1.3.6.1.4.1.674.10893.1.20.130.1.1.8' => 'controllerFWVersion',
1132 '1.3.6.1.4.1.674.10893.1.20.130.1.1.38' => 'controllerComponentStatus',
1133 '1.3.6.1.4.1.674.10893.1.20.130.1.1.39' => 'controllerNexusID',
1134 '1.3.6.1.4.1.674.10893.1.20.130.1.1.41' => 'controllerDriverVersion',
1135 '1.3.6.1.4.1.674.10893.1.20.130.1.1.44' => 'controllerMinFWVersion',
1136 '1.3.6.1.4.1.674.10893.1.20.130.1.1.45' => 'controllerMinDriverVersion',
1137 '1.3.6.1.4.1.674.10893.1.20.130.1.1.55' => 'FIXME_StorportDriverVersion',
1138 '1.3.6.1.4.1.674.10893.1.20.130.1.1.56' => 'FIXME_StorportMinDriverVersion',
1141 # We use get_table() here for the odd case where a server has
1142 # two or more controllers, and where some OIDs are missing on
1143 # one of the controllers.
1144 my $controllerTable = '1.3.6.1.4.1.674.10893.1.20.130.1';
1145 my $result = $snmp_session->get_table(-baseoid => $controllerTable);
1147 # No controllers is OK
1148 return if !defined $result;
1150 @output = @{ get_snmp_output($result, \%ctrl_oid) };
1153 @output = @{ run_omreport('storage controller') };
1167 foreach my $out (@output) {
1169 $id = $out->{controllerNumber} - 1;
1170 $name = $out->{controllerName};
1171 $state = $ctrl_state{$out->{controllerState}};
1172 $status = $snmp_status{$out->{controllerComponentStatus}};
1173 $minfw = exists $out->{controllerMinFWVersion}
1174 ? $out->{controllerMinFWVersion} : undef;
1175 $mindr = exists $out->{controllerMinDriverVersion}
1176 ? $out->{controllerMinDriverVersion} : undef;
1177 $firmware = exists $out->{controllerFWVersion}
1178 ? $out->{controllerFWVersion} : 'N/A';
1179 $driver = exists $out->{controllerDriverVersion}
1180 ? $out->{controllerDriverVersion} : 'N/A';
1181 $minstdr = exists $out->{'FIXME_StorportMinDriverVersion'}
1182 ? $out->{FIXME_StorportMinDriverVersion} : undef;
1183 $stdr = exists $out->{FIXME_StorportDriverVersion}
1184 ? $out->{FIXME_StorportDriverVersion} : undef;
1185 $nexus = convert_nexus($out->{controllerNexusID});
1189 $name = $out->{Name};
1190 $state = $out->{State};
1191 $status = $out->{Status};
1192 $minfw = $out->{'Minimum Required Firmware Version'} ne 'Not Applicable'
1193 ? $out->{'Minimum Required Firmware Version'} : undef;
1194 $mindr = $out->{'Minimum Required Driver Version'} ne 'Not Applicable'
1195 ? $out->{'Minimum Required Driver Version'} : undef;
1196 $firmware = $out->{'Firmware Version'} ne 'Not Applicable'
1197 ? $out->{'Firmware Version'} : 'N/A';
1198 $driver = $out->{'Driver Version'} ne 'Not Applicable'
1199 ? $out->{'Driver Version'} : 'N/A';
1200 $minstdr = (exists $out->{'Minimum Required Storport Driver Version'}
1201 and $out->{'Minimum Required Storport Driver Version'} ne 'Not Applicable')
1202 ? $out->{'Minimum Required Storport Driver Version'} : undef;
1203 $stdr = (exists $out->{'Storport Driver Version'}
1204 and $out->{'Storport Driver Version'} ne 'Not Applicable')
1205 ? $out->{'Storport Driver Version'} : undef;
1209 $name =~ s{\s+\z}{}xms; # remove trailing whitespace
1210 push @controllers, $id;
1212 # Collecting some storage info
1213 $sysinfo{'controller'}{$id}{'id'} = $nexus;
1214 $sysinfo{'controller'}{$id}{'name'} = $name;
1215 $sysinfo{'controller'}{$id}{'driver'} = $driver;
1216 $sysinfo{'controller'}{$id}{'firmware'} = $firmware;
1217 $sysinfo{'controller'}{$id}{'storport'} = $stdr;
1219 next CTRL if blacklisted('ctrl', $nexus);
1221 # Special case: old firmware
1222 if (!blacklisted('ctrl_fw', $id) && defined $minfw) {
1224 my $msg = sprintf q{Controller %d [%s]: Firmware '%s' is out of date},
1225 $id, $name, $firmware;
1226 report('storage', $msg, $E_WARNING, $nexus);
1228 # Special case: old driver
1229 if (!blacklisted('ctrl_driver', $id) && defined $mindr) {
1231 my $msg = sprintf q{Controller %d [%s]: Driver '%s' is out of date},
1232 $id, $name, $driver;
1233 report('storage', $msg, $E_WARNING, $nexus);
1235 # Special case: old storport driver
1236 if (!blacklisted('ctrl_stdr', $id) && defined $minstdr) {
1238 my $msg = sprintf q{Controller %d [%s]: Storport driver '%s' is out of date},
1240 report('storage', $msg, $E_WARNING, $nexus);
1243 if ($status eq 'Ok' or ($status eq 'Non-Critical'
1244 and (defined $minfw or defined $mindr or defined $minstdr))) {
1245 my $msg = sprintf 'Controller %d [%s] is %s',
1247 report('storage', $msg, $E_OK, $nexus);
1251 my $msg = sprintf 'Controller %d [%s] needs attention: %s',
1253 report('storage', $msg, $status2nagios{$status}, $nexus);
1260 #-----------------------------------------
1261 # STORAGE: Check physical drives
1262 #-----------------------------------------
1263 sub check_physical_disks {
1264 return if $#controllers == -1;
1265 return if blacklisted('pdisk', 'all');
1275 my $vendor = undef; # disk vendor
1276 my $product = undef; # product ID
1277 my $capacity = undef; # disk length (size) in bytes
1283 '1.3.6.1.4.1.674.10893.1.20.130.4.1.1' => 'arrayDiskNumber',
1284 '1.3.6.1.4.1.674.10893.1.20.130.4.1.2' => 'arrayDiskName',
1285 '1.3.6.1.4.1.674.10893.1.20.130.4.1.3' => 'arrayDiskVendor',
1286 '1.3.6.1.4.1.674.10893.1.20.130.4.1.4' => 'arrayDiskState',
1287 '1.3.6.1.4.1.674.10893.1.20.130.4.1.6' => 'arrayDiskProductID',
1288 '1.3.6.1.4.1.674.10893.1.20.130.4.1.9' => 'arrayDiskEnclosureID',
1289 '1.3.6.1.4.1.674.10893.1.20.130.4.1.10' => 'arrayDiskChannel',
1290 '1.3.6.1.4.1.674.10893.1.20.130.4.1.11' => 'arrayDiskLengthInMB',
1291 '1.3.6.1.4.1.674.10893.1.20.130.4.1.15' => 'arrayDiskTargetID',
1292 '1.3.6.1.4.1.674.10893.1.20.130.4.1.16' => 'arrayDiskLunID',
1293 '1.3.6.1.4.1.674.10893.1.20.130.4.1.24' => 'arrayDiskComponentStatus',
1294 '1.3.6.1.4.1.674.10893.1.20.130.4.1.26' => 'arrayDiskNexusID',
1295 '1.3.6.1.4.1.674.10893.1.20.130.4.1.31' => 'arrayDiskSmartAlertIndication',
1296 '1.3.6.1.4.1.674.10893.1.20.130.5.1.7' => 'arrayDiskEnclosureConnectionControllerNumber',
1297 '1.3.6.1.4.1.674.10893.1.20.130.6.1.7' => 'arrayDiskChannelConnectionControllerNumber',
1299 my $result = $snmp_session->get_entries(-columns => [keys %pdisk_oid]);
1301 if (!defined $result) {
1302 printf "SNMP ERROR [storage / pdisk]: %s.\n", $snmp_session->error;
1303 $snmp_session->close;
1307 @output = @{ get_snmp_output($result, \%pdisk_oid) };
1310 foreach my $c (@controllers) {
1311 push @output, @{ run_omreport("storage pdisk controller=$c") };
1312 map_item('ctrl', $c, \@output);
1330 28 => 'Diagnostics',
1331 34 => 'Predictive failure',
1332 35 => 'Initializing',
1335 41 => 'Unsupported',
1336 53 => 'Incompatible',
1339 # Check physical disks on each of the controllers
1341 foreach my $out (@output) {
1343 $name = $out->{arrayDiskName};
1344 if (exists $out->{arrayDiskEnclosureID}) {
1345 $id = join q{:}, ($out->{arrayDiskChannel}, $out->{arrayDiskEnclosureID},
1346 $out->{arrayDiskTargetID});
1349 $id = join q{:}, ($out->{arrayDiskChannel}, $out->{arrayDiskTargetID});
1351 $state = $pdisk_state{$out->{arrayDiskState}};
1352 $status = $snmp_status{$out->{arrayDiskComponentStatus}};
1353 $fpred = $out->{arrayDiskSmartAlertIndication} == 2 ? 1 : 0;
1355 $nexus = convert_nexus($out->{arrayDiskNexusID});
1356 $vendor = $out->{arrayDiskVendor};
1357 $product = $out->{arrayDiskProductID};
1358 $capacity = $out->{arrayDiskLengthInMB} * 1024**2;
1359 if (exists $out->{arrayDiskEnclosureConnectionControllerNumber}) {
1360 $ctrl = $out->{arrayDiskEnclosureConnectionControllerNumber} - 1;
1362 elsif (exists $out->{arrayDiskChannelConnectionControllerNumber}) {
1363 $ctrl = $out->{arrayDiskChannelConnectionControllerNumber} - 1;
1371 $name = $out->{'Name'};
1372 $state = $out->{'State'};
1373 $status = $out->{'Status'};
1374 $fpred = lc($out->{'Failure Predicted'}) eq 'yes' ? 1 : 0;
1375 $progr = ' [' . $out->{'Progress'} . ']';
1376 $ctrl = $out->{'ctrl'};
1377 $nexus = join q{:}, $out->{ctrl}, $id;
1378 $vendor = $out->{'Vendor ID'};
1379 $product = $out->{'Product ID'};
1380 $capacity = $out->{'Capacity'};
1381 $capacity =~ s{\A .*? \((\d+) \s bytes\) \z}{$1}xms;
1384 next PDISK if blacklisted('pdisk', $nexus);
1387 $vendor =~ s{\s+\z}{}xms; # remove trailing whitespace
1388 $product =~ s{\s+\z}{}xms; # remove trailing whitespace
1390 # Calculate human readable capacity
1391 $capacity = ceil($capacity / 1000**3) >= 1000
1392 ? sprintf '%.1fTB', ($capacity / 1000**4)
1393 : sprintf '%.0fGB', ($capacity / 1000**3);
1394 $capacity = '450GB' if $capacity eq '449GB'; # quick fix for 450GB disks
1395 $capacity = '300GB' if $capacity eq '299GB'; # quick fix for 300GB disks
1396 $capacity = '146GB' if $capacity eq '147GB'; # quick fix for 146GB disks
1398 # Capitalize only the first letter of the vendor name
1399 $vendor = (substr $vendor, 0, 1) . lc (substr $vendor, 1, length $vendor);
1401 # Remove unnecessary trademark rubbish from vendor name
1402 $vendor =~ s{\(tm\)\z}{}xms;
1404 # Special case: Failure predicted
1405 if ($status eq 'Non-Critical' and $fpred) {
1406 my $msg = sprintf '%s [%s %s, %s] on ctrl %d needs attention: Failure Predicted',
1407 $name, $vendor, $product, $capacity, $ctrl;
1408 report('storage', $msg, $E_WARNING, $nexus);
1410 # Special case: Rebuilding
1411 elsif ($state eq 'Rebuilding') {
1412 my $msg = sprintf '%s [%s] on ctrl %d is %s%s',
1413 $name, $capacity, $ctrl, $state, $progr;
1414 report('storage', $msg, $E_WARNING, $nexus);
1417 elsif ($status ne 'Ok') {
1418 my $msg = sprintf '%s [%s %s, %s] on ctrl %d needs attention: %s',
1419 $name, $vendor, $product, $capacity, $ctrl, $state;
1420 report('storage', $msg, $status2nagios{$status}, $nexus);
1424 my $msg = sprintf '%s [%s] on ctrl %d is %s',
1425 $name, $capacity, $ctrl, $state;
1426 report('storage', $msg, $E_OK, $nexus);
1433 #-----------------------------------------
1434 # STORAGE: Check logical drives
1435 #-----------------------------------------
1436 sub check_virtual_disks {
1437 return if $#controllers == -1;
1438 return if blacklisted('vdisk', 'all');
1455 '1.3.6.1.4.1.674.10893.1.20.140.1.1.3' => 'virtualDiskDeviceName',
1456 '1.3.6.1.4.1.674.10893.1.20.140.1.1.4' => 'virtualDiskState',
1457 '1.3.6.1.4.1.674.10893.1.20.140.1.1.6' => 'virtualDiskLengthInMB',
1458 '1.3.6.1.4.1.674.10893.1.20.140.1.1.13' => 'virtualDiskLayout',
1459 '1.3.6.1.4.1.674.10893.1.20.140.1.1.17' => 'virtualDiskTargetID',
1460 '1.3.6.1.4.1.674.10893.1.20.140.1.1.20' => 'virtualDiskComponentStatus',
1461 '1.3.6.1.4.1.674.10893.1.20.140.1.1.21' => 'virtualDiskNexusID',
1463 my $result = $snmp_session->get_entries(-columns => [keys %vdisk_oid]);
1465 # No logical drives is OK
1466 return if !defined $result;
1468 @output = @{ get_snmp_output($result, \%vdisk_oid) };
1471 foreach my $c (@controllers) {
1472 push @output, @{ run_omreport("storage vdisk controller=$c") };
1473 map_item('ctrl', $c, \@output);
1486 16 => 'Regenerating',
1489 32 => 'Reconstructing',
1490 35 => 'Initializing',
1491 36 => 'Background Initialization',
1492 38 => 'Resynching Paused',
1493 52 => 'Permanently Degraded',
1494 54 => 'Degraded Redundancy',
1499 1 => 'Concatenated',
1506 19 => 'Concatenated RAID 1',
1510 # Check virtual disks on each of the controllers
1512 foreach my $out (@output) {
1514 $id = $out->{virtualDiskTargetID};
1515 $dev = $out->{virtualDiskDeviceName};
1516 $state = $vdisk_state{$out->{virtualDiskState}};
1517 $status = $snmp_status{$out->{virtualDiskComponentStatus}};
1518 $layout = $vdisk_layout{$out->{virtualDiskLayout}};
1519 $size = sprintf '%.2f GB', $out->{virtualDiskLengthInMB} / 1024;
1520 $progr = q{}; # can't get this from SNMP(?)
1521 $nexus = convert_nexus($out->{virtualDiskNexusID});
1522 $ctrl = $nexus; # We use the nexus id to get the controller id
1523 $ctrl =~ s{\A (\d+):\d+ \z}{$1}xms;
1527 $dev = $out->{'Device Name'};
1528 $state = $out->{State};
1529 $status = $out->{Status};
1530 $layout = $out->{Layout};
1531 $size = $out->{Size};
1532 $progr = ' [' . $out->{Progress} . ']';
1533 $size =~ s{\A (.*GB).* \z}{$1}xms;
1534 $nexus = join q{:}, $out->{ctrl}, $id;
1535 $ctrl = $out->{ctrl};
1538 next VDISK if blacklisted('vdisk', $nexus);
1541 # The device name is undefined sometimes
1542 $dev = q{} if !defined $dev;
1544 # Special case: Regenerating
1545 if ($state eq 'Regenerating') {
1546 my $msg = sprintf q{Logical drive %d '%s' [%s, %s] on ctrl %d is %s%s},
1547 $id, $dev, $layout, $size, $ctrl, $state, $progr;
1548 report('storage', $msg, $E_WARNING, $nexus);
1551 elsif ($status ne 'Ok') {
1552 my $msg = sprintf q{Logical drive %d '%s' [%s, %s] on ctrl %d needs attention: %s},
1553 $id, $dev, $layout, $size, $ctrl, $state;
1554 report('storage', $msg, $status2nagios{$status}, $nexus);
1558 my $msg = sprintf q{Logical drive %d '%s' [%s, %s] on ctrl %d is %s},
1559 $id, $dev, $layout, $size, $ctrl, $state;
1560 report('storage', $msg, $E_OK, $nexus);
1567 #-----------------------------------------
1568 # STORAGE: Check cache batteries
1569 #-----------------------------------------
1570 sub check_cache_battery {
1571 return if $#controllers == -1;
1572 return if blacklisted('bat', 'all');
1579 my $learn = undef; # learn state
1580 my $pred = undef; # battery's ability to be charged
1586 '1.3.6.1.4.1.674.10893.1.20.130.15.1.4' => 'batteryState',
1587 '1.3.6.1.4.1.674.10893.1.20.130.15.1.6' => 'batteryComponentStatus',
1588 '1.3.6.1.4.1.674.10893.1.20.130.15.1.9' => 'batteryNexusID',
1589 '1.3.6.1.4.1.674.10893.1.20.130.15.1.10' => 'batteryPredictedCapacity',
1590 '1.3.6.1.4.1.674.10893.1.20.130.15.1.12' => 'batteryLearnState',
1591 '1.3.6.1.4.1.674.10893.1.20.130.16.1.5' => 'batteryConnectionControllerNumber',
1593 my $result = $snmp_session->get_entries(-columns => [keys %bat_oid]);
1595 # No cache battery is OK
1596 return if !defined $result;
1598 @output = @{ get_snmp_output($result, \%bat_oid) };
1601 foreach my $c (@controllers) {
1602 push @output, @{ run_omreport("storage battery controller=$c") };
1603 map_item('ctrl', $c, \@output);
1613 7 => 'Reconditioning',
1621 # Specifies the learn state activity of the battery
1631 # This property displays the battery's ability to be charged
1634 1 => 'Failed', # The battery cannot be charged and needs to be replaced
1635 2 => 'Ready', # The battery can be charged to full capacity
1636 4 => 'Unknown', # The battery is completing a Learn cycle. The charge capacity of the
1637 # battery cannot be determined until the Learn cycle is complete
1640 # Check battery on each of the controllers
1642 foreach my $out (@output) {
1644 $state = $bat_state{$out->{batteryState}};
1645 $status = $snmp_status{$out->{batteryComponentStatus}};
1646 $learn = exists $out->{batteryLearnState}
1647 ? $bat_learn_state{$out->{batteryLearnState}} : undef;
1648 $pred = exists $out->{batteryPredictedCapacity}
1649 ? $bat_pred_cap{$out->{batteryPredictedCapacity}} : undef;
1650 $ctrl = $out->{batteryConnectionControllerNumber} - 1;
1651 $nexus = convert_nexus($out->{batteryNexusID});
1653 $id =~ s{\A \d+:(\d+) \z}{$1}xms;
1657 $state = $out->{'State'};
1658 $status = $out->{'Status'};
1659 $learn = $out->{'Learn State'};
1660 $pred = $out->{'Predicted Capacity Status'};
1661 $ctrl = $out->{'ctrl'};
1662 $nexus = join q{:}, $out->{ctrl}, $id;
1665 next BATTERY if blacklisted('bat', $nexus);
1667 # Special case: Charging
1668 if ($state eq 'Charging') {
1669 if ($pred eq 'Failed') {
1670 my $msg = sprintf 'Cache battery %d in controller %d is %s (%s) [replace battery]',
1671 $id, $ctrl, $state, $pred;
1672 report('storage', $msg, $E_CRITICAL, $nexus);
1675 next BATTERY if blacklisted('bat_charge', $nexus);
1676 my $msg = sprintf 'Cache battery %d in controller %d is %s (%s) [probably harmless]',
1677 $id, $ctrl, $state, $pred;
1678 report('storage', $msg, $E_WARNING, $nexus);
1681 # Special case: Learning (battery learns its capacity)
1682 elsif ($state eq 'Learning') {
1683 if ($learn eq 'Failed') {
1684 my $msg = sprintf 'Cache battery %d in controller %d is %s (%s)',
1685 $id, $ctrl, $state, $learn;
1686 report('storage', $msg, $E_CRITICAL, $nexus);
1689 next BATTERY if blacklisted('bat_charge', $nexus);
1690 my $msg = sprintf 'Cache battery %d in controller %d is %s (%s) [probably harmless]',
1691 $id, $ctrl, $state, $learn;
1692 report('storage', $msg, $E_WARNING, $nexus);
1695 # Special case: Power Low (first part of recharge cycle)
1696 elsif ($state eq 'Power Low') {
1697 next BATTERY if blacklisted('bat_charge', $nexus);
1698 my $msg = sprintf 'Cache battery %d in controller %d is %s [probably harmless]',
1700 report('storage', $msg, $E_WARNING, $nexus);
1702 # Special case: Degraded and Non-Critical (usually part of recharge cycle)
1703 elsif ($state eq 'Degraded' && $status eq 'Non-Critical') {
1704 next BATTERY if blacklisted('bat_charge', $nexus);
1705 my $msg = sprintf 'Cache battery %d in controller %d is %s (%s) [probably harmless]',
1706 $id, $ctrl, $state, $status;
1707 report('storage', $msg, $E_WARNING, $nexus);
1710 elsif ($status ne 'Ok') {
1711 my $msg = sprintf 'Cache battery %d in controller %d needs attention: %s (%s)',
1712 $id, $ctrl, $state, $status;
1713 report('storage', $msg, $status2nagios{$status}, $nexus);
1717 my $msg = sprintf 'Cache battery %d in controller %d is %s',
1719 report('storage', $msg, $E_OK, $nexus);
1726 #-----------------------------------------
1727 # STORAGE: Check connectors (channels)
1728 #-----------------------------------------
1729 sub check_connectors {
1730 return if $#controllers == -1;
1731 return if blacklisted('conn', 'all');
1745 '1.3.6.1.4.1.674.10893.1.20.130.2.1.1' => 'channelNumber',
1746 '1.3.6.1.4.1.674.10893.1.20.130.2.1.2' => 'channelName',
1747 '1.3.6.1.4.1.674.10893.1.20.130.2.1.3' => 'channelState',
1748 '1.3.6.1.4.1.674.10893.1.20.130.2.1.8' => 'channelComponentStatus',
1749 '1.3.6.1.4.1.674.10893.1.20.130.2.1.9' => 'channelNexusID',
1750 '1.3.6.1.4.1.674.10893.1.20.130.2.1.11' => 'channelBusType',
1752 my $result = $snmp_session->get_entries(-columns => [keys %conn_oid]);
1754 if (!defined $result) {
1755 printf "SNMP ERROR [storage / channel]: %s.\n", $snmp_session->error;
1756 $snmp_session->close;
1760 @output = @{ get_snmp_output($result, \%conn_oid) };
1763 foreach my $c (@controllers) {
1764 push @output, @{ run_omreport("storage connector controller=$c") };
1765 map_item('ctrl', $c, \@output);
1783 3 => 'Fibre Channel',
1790 # Check connectors on each of the controllers
1792 foreach my $out (@output) {
1794 $id = $out->{channelNumber} - 1;
1795 $name = $out->{channelName};
1796 $state = $conn_state{$out->{channelState}};
1797 $status = $snmp_status{$out->{channelComponentStatus}};
1798 $type = $conn_bustype{$out->{channelBusType}};
1799 $nexus = convert_nexus($out->{channelNexusID});
1801 $ctrl =~ s{(\d+):\d+}{$1}xms;
1805 $name = $out->{'Name'};
1806 $state = $out->{'State'};
1807 $status = $out->{'Status'};
1808 $type = $out->{'Connector Type'};
1809 $ctrl = $out->{ctrl};
1810 $nexus = join q{:}, $out->{ctrl}, $id;
1813 next CHANNEL if blacklisted('conn', $nexus);
1815 my $msg = sprintf '%s [%s] on controller %d is %s',
1816 $name, $type, $ctrl, $state;
1817 report('storage', $msg, $status2nagios{$status}, $nexus);
1823 #-----------------------------------------
1824 # STORAGE: Check enclosures
1825 #-----------------------------------------
1826 sub check_enclosures {
1827 return if blacklisted('encl', 'all');
1834 my $firmware = undef;
1841 '1.3.6.1.4.1.674.10893.1.20.130.3.1.1' => 'enclosureNumber',
1842 '1.3.6.1.4.1.674.10893.1.20.130.3.1.2' => 'enclosureName',
1843 '1.3.6.1.4.1.674.10893.1.20.130.3.1.4' => 'enclosureState',
1844 '1.3.6.1.4.1.674.10893.1.20.130.3.1.19' => 'enclosureChannelNumber',
1845 '1.3.6.1.4.1.674.10893.1.20.130.3.1.24' => 'enclosureComponentStatus',
1846 '1.3.6.1.4.1.674.10893.1.20.130.3.1.25' => 'enclosureNexusID',
1847 '1.3.6.1.4.1.674.10893.1.20.130.3.1.26' => 'enclosureFirmwareVersion',
1849 my $result = $snmp_session->get_entries(-columns => [keys %encl_oid]);
1851 # No enclosures is OK
1852 return if !defined $result;
1854 @output = @{ get_snmp_output($result, \%encl_oid) };
1857 foreach my $c (@controllers) {
1858 push @output, @{ run_omreport("storage enclosure controller=$c") };
1859 map_item('ctrl', $c, \@output);
1874 foreach my $out (@output) {
1876 $id = $out->{'enclosureNumber'} - 1;
1877 $name = $out->{'enclosureName'};
1878 $state = $encl_state{$out->{'enclosureState'}};
1879 $status = $snmp_status{$out->{'enclosureComponentStatus'}};
1880 $firmware = exists $out->{enclosureFirmwareVersion}
1881 ? $out->{enclosureFirmwareVersion} : 'N/A';
1882 $nexus = convert_nexus($out->{enclosureNexusID});
1884 $ctrl =~ s{\A (\d+):.* \z}{$1}xms;
1888 $name = $out->{Name};
1889 $state = $out->{State};
1890 $status = $out->{Status};
1891 $firmware = $out->{'Firmware Version'} ne 'Not Applicable'
1892 ? $out->{'Firmware Version'} : 'N/A';
1893 $nexus = join q{:}, $out->{ctrl}, $id;
1894 $ctrl = $out->{ctrl};
1897 $name =~ s{\s+\z}{}xms; # remove trailing whitespace
1898 $firmware =~ s{\s+\z}{}xms; # remove trailing whitespace
1900 # store enclosure data for future use
1901 push @enclosures, { 'id' => $id,
1902 'ctrl' => $out->{ctrl},
1905 # Collecting some storage info
1906 $sysinfo{'enclosure'}{$nexus}{'id'} = $nexus;
1907 $sysinfo{'enclosure'}{$nexus}{'name'} = $name;
1908 $sysinfo{'enclosure'}{$nexus}{'firmware'} = $firmware;
1910 next ENCLOSURE if blacklisted('encl', $nexus);
1912 my $msg = sprintf 'Enclosure %s [%s] on controller %d is %s',
1913 $nexus, $name, $ctrl, $state;
1914 report('storage', $msg, $status2nagios{$status}, $nexus);
1920 #-----------------------------------------
1921 # STORAGE: Check enclosure fans
1922 #-----------------------------------------
1923 sub check_enclosure_fans {
1924 return if $#controllers == -1;
1925 return if blacklisted('encl_fan', 'all');
1933 my $encl_id = undef;
1934 my $encl_name = undef;
1940 '1.3.6.1.4.1.674.10893.1.20.130.7.1.1' => 'fanNumber',
1941 '1.3.6.1.4.1.674.10893.1.20.130.7.1.2' => 'fanName',
1942 '1.3.6.1.4.1.674.10893.1.20.130.7.1.4' => 'fanState',
1943 '1.3.6.1.4.1.674.10893.1.20.130.7.1.11' => 'fanProbeCurrValue',
1944 '1.3.6.1.4.1.674.10893.1.20.130.7.1.15' => 'fanComponentStatus',
1945 '1.3.6.1.4.1.674.10893.1.20.130.7.1.16' => 'fanNexusID',
1946 '1.3.6.1.4.1.674.10893.1.20.130.8.1.4' => 'fanConnectionEnclosureName',
1947 '1.3.6.1.4.1.674.10893.1.20.130.8.1.5' => 'fanConnectionEnclosureNumber',
1950 my $result = $snmp_session->get_entries(-columns => [keys %fan_oid]);
1952 # No enclosure fans is OK
1953 return if !defined $result;
1955 @output = @{ get_snmp_output($result, \%fan_oid) };
1958 foreach my $enc (@enclosures) {
1959 push @output, @{ run_omreport("storage enclosure controller=$enc->{ctrl} enclosure=$enc->{id} info=fans") };
1960 map_item('ctrl', $enc->{ctrl}, \@output);
1961 map_item('encl_id', $enc->{id}, \@output);
1962 map_item('encl_name', $enc->{name}, \@output);
1977 # Check fans on each of the enclosures
1979 foreach my $out (@output) {
1981 $id = $out->{fanNumber} - 1;
1982 $name = $out->{fanName};
1983 $state = $fan_state{$out->{fanState}};
1984 $status = $snmp_status{$out->{fanComponentStatus}};
1985 $speed = $out->{fanProbeCurrValue};
1986 $encl_id = $out->{fanConnectionEnclosureNumber} - 1;
1987 $encl_name = $out->{fanConnectionEnclosureName};
1988 $nexus = convert_nexus($out->{fanNexusID});
1992 $name = $out->{'Name'};
1993 $state = $out->{'State'};
1994 $status = $out->{'Status'};
1995 $speed = $out->{'Speed'};
1996 $encl_id = join q{:}, $out->{ctrl}, $out->{'encl_id'};
1997 $encl_name = $out->{encl_name};
1998 $nexus = join q{:}, $out->{ctrl}, $out->{'encl_id'}, $id;
2001 next FAN if blacklisted('encl_fan', $nexus);
2004 if ($status ne 'Ok') {
2005 my $msg = sprintf '%s in enclosure %s [%s] needs attention: %s',
2006 $name, $encl_id, $encl_name, $state;
2007 report('storage', $msg, $status2nagios{$status}, $nexus);
2011 my $msg = sprintf '%s in enclosure %s [%s] is %s (speed=%s)',
2012 $name, $encl_id, $encl_name, $state, $speed;
2013 report('storage', $msg, $E_OK, $nexus);
2020 #-----------------------------------------
2021 # STORAGE: Check enclosure power supplies
2022 #-----------------------------------------
2023 sub check_enclosure_pwr {
2024 return if $#controllers == -1;
2025 return if blacklisted('encl_ps', 'all');
2032 my $encl_id = undef;
2033 my $encl_name = undef;
2039 '1.3.6.1.4.1.674.10893.1.20.130.9.1.1' => 'powerSupplyNumber',
2040 '1.3.6.1.4.1.674.10893.1.20.130.9.1.2' => 'powerSupplyName',
2041 '1.3.6.1.4.1.674.10893.1.20.130.9.1.4' => 'powerSupplyState',
2042 '1.3.6.1.4.1.674.10893.1.20.130.9.1.9' => 'powerSupplyComponentStatus',
2043 '1.3.6.1.4.1.674.10893.1.20.130.9.1.10' => 'powerSupplyNexusID',
2044 '1.3.6.1.4.1.674.10893.1.20.130.10.1.4' => 'powerSupplyConnectionEnclosureName',
2045 '1.3.6.1.4.1.674.10893.1.20.130.10.1.5' => 'powerSupplyConnectionEnclosureNumber',
2047 my $result = $snmp_session->get_entries(-columns => [keys %ps_oid]);
2049 # No enclosure power supplies is OK
2050 return if !defined $result;
2052 @output = @{ get_snmp_output($result, \%ps_oid) };
2055 foreach my $enc (@enclosures) {
2056 push @output, @{ run_omreport("storage enclosure controller=$enc->{ctrl} enclosure=$enc->{id} info=pwrsupplies") };
2057 map_item('ctrl', $enc->{ctrl}, \@output);
2058 map_item('encl_id', $enc->{id}, \@output);
2059 map_item('encl_name', $enc->{name}, \@output);
2068 5 => 'Not Installed',
2074 # Check power supplies on each of the enclosures
2076 foreach my $out (@output) {
2078 $id = $out->{powerSupplyNumber};
2079 $name = $out->{powerSupplyName};
2080 $state = $ps_state{$out->{powerSupplyState}};
2081 $status = $snmp_status{$out->{powerSupplyComponentStatus}};
2082 $encl_id = $out->{powerSupplyConnectionEnclosureNumber} - 1;
2083 $encl_name = $out->{powerSupplyConnectionEnclosureName};
2084 $nexus = convert_nexus($out->{powerSupplyNexusID});
2088 $name = $out->{'Name'};
2089 $state = $out->{'State'};
2090 $status = $out->{'Status'};
2091 $encl_id = join q{:}, $out->{ctrl}, $out->{'encl_id'};
2092 $encl_name = $out->{encl_name};
2093 $nexus = join q{:}, $out->{ctrl}, $out->{'encl_id'}, $id;
2096 next PS if blacklisted('encl_ps', $nexus);
2099 if ($status ne 'Ok') {
2100 my $msg = sprintf '%s in enclosure %s [%s] needs attention: %s',
2101 $name, $encl_id, $encl_name, $state;
2102 report('storage', $msg, $status2nagios{$status}, $nexus);
2106 my $msg = sprintf '%s in enclosure %s [%s] is %s',
2107 $name, $encl_id, $encl_name, $state;
2108 report('storage', $msg, $E_OK, $nexus);
2115 #-----------------------------------------
2116 # STORAGE: Check enclosure temperatures
2117 #-----------------------------------------
2118 sub check_enclosure_temp {
2119 return if $#controllers == -1;
2120 return if blacklisted('encl_temp', 'all');
2127 my $reading = undef;
2129 my $max_warn = undef;
2130 my $max_crit = undef;
2131 my $encl_id = undef;
2132 my $encl_name = undef;
2138 '1.3.6.1.4.1.674.10893.1.20.130.11.1.1' => 'temperatureProbeNumber',
2139 '1.3.6.1.4.1.674.10893.1.20.130.11.1.2' => 'temperatureProbeName',
2140 '1.3.6.1.4.1.674.10893.1.20.130.11.1.4' => 'temperatureProbeState',
2141 '1.3.6.1.4.1.674.10893.1.20.130.11.1.6' => 'temperatureProbeUnit',
2142 '1.3.6.1.4.1.674.10893.1.20.130.11.1.9' => 'temperatureProbeMaxWarning',
2143 '1.3.6.1.4.1.674.10893.1.20.130.11.1.10' => 'temperatureProbeMaxCritical',
2144 '1.3.6.1.4.1.674.10893.1.20.130.11.1.11' => 'temperatureProbeCurValue',
2145 '1.3.6.1.4.1.674.10893.1.20.130.11.1.13' => 'temperatureProbeComponentStatus',
2146 '1.3.6.1.4.1.674.10893.1.20.130.11.1.14' => 'temperatureProbeNexusID',
2147 '1.3.6.1.4.1.674.10893.1.20.130.12.1.4' => 'temperatureConnectionEnclosureName',
2148 '1.3.6.1.4.1.674.10893.1.20.130.12.1.5' => 'temperatureConnectionEnclosureNumber',
2150 my $result = $snmp_session->get_entries(-columns => [keys %temp_oid]);
2152 # No enclosure temperature probes is OK
2153 return if !defined $result;
2155 @output = @{ get_snmp_output($result, \%temp_oid) };
2158 foreach my $enc (@enclosures) {
2159 push @output, @{ run_omreport("storage enclosure controller=$enc->{ctrl} enclosure=$enc->{id} info=temps") };
2160 map_item('ctrl', $enc->{ctrl}, \@output);
2161 map_item('encl_id', $enc->{id}, \@output);
2162 map_item('encl_name', $enc->{name}, \@output);
2177 # Check temperature probes on each of the enclosures
2179 foreach my $out (@output) {
2181 $id = $out->{temperatureProbeNumber} - 1;
2182 $name = $out->{temperatureProbeName};
2183 $state = $temp_state{$out->{temperatureProbeState}};
2184 $status = $snmp_status{$out->{temperatureProbeComponentStatus}};
2185 $unit = $out->{temperatureProbeUnit};
2186 $reading = $out->{temperatureProbeCurValue};
2187 $max_warn = $out->{temperatureProbeMaxWarning};
2188 $max_crit = $out->{temperatureProbeMaxCritical};
2189 $encl_id = $out->{temperatureConnectionEnclosureNumber} - 1;
2190 $encl_name = $out->{temperatureConnectionEnclosureName};
2191 $nexus = convert_nexus($out->{temperatureProbeNexusID});
2195 $name = $out->{'Name'};
2196 $state = $out->{'State'};
2197 $status = $out->{'Status'};
2199 $reading = $out->{'Reading'}; $reading =~ s{\s*C}{}xms;
2200 $max_warn = $out->{'Maximum Warning Threshold'}; $max_warn =~ s{\s*C}{}xms;
2201 $max_crit = $out->{'Maximum Failure Threshold'}; $max_crit =~ s{\s*C}{}xms;
2202 $encl_id = join q{:}, $out->{ctrl}, $out->{'encl_id'};
2203 $encl_name = $out->{encl_name};
2204 $nexus = join q{:}, $out->{ctrl}, $out->{'encl_id'}, $id;
2207 next TEMP if blacklisted('encl_temp', $nexus);
2210 if ($status ne 'Ok') {
2211 my $msg = sprintf '%s in enclosure %s [%s] is %s C at %s (%s max)',
2212 $name, $encl_id, $encl_name, $state, $reading, $max_crit;
2213 report('storage', $msg, $status2nagios{$status}, $nexus);
2217 my $msg = sprintf '%s in enclosure %s [%s]: %s C (%s max)',
2218 $name, $encl_id, $encl_name, $reading, $max_crit;
2219 report('storage', $msg, $E_OK, $nexus);
2222 # Collect performance data
2223 if (defined $opt{perfdata}) {
2224 $name =~ s{\A Temperature\sProbe\s(\d+) \z}{temp_$1}gxms;
2225 my $pkey = "enclosure_${encl_id}_${name}";
2226 my $pval = join q{;}, "${reading}C", $max_warn, $max_crit;
2227 $perfdata{$pkey} = $pval;
2234 #-----------------------------------------
2235 # STORAGE: Check enclosure management modules (EMM)
2236 #-----------------------------------------
2237 sub check_enclosure_emms {
2238 return if $#controllers == -1;
2239 return if blacklisted('encl_emm', 'all');
2246 my $encl_id = undef;
2247 my $encl_name = undef;
2253 '1.3.6.1.4.1.674.10893.1.20.130.13.1.1' => 'enclosureManagementModuleNumber',
2254 '1.3.6.1.4.1.674.10893.1.20.130.13.1.2' => 'enclosureManagementModuleName',
2255 '1.3.6.1.4.1.674.10893.1.20.130.13.1.4' => 'enclosureManagementModuleState',
2256 '1.3.6.1.4.1.674.10893.1.20.130.13.1.11' => 'enclosureManagementModuleComponentStatus',
2257 '1.3.6.1.4.1.674.10893.1.20.130.13.1.12' => 'enclosureManagementModuleNexusID',
2258 '1.3.6.1.4.1.674.10893.1.20.130.14.1.4' => 'enclosureManagementModuleConnectionEnclosureName',
2259 '1.3.6.1.4.1.674.10893.1.20.130.14.1.5' => 'enclosureManagementModuleConnectionEnclosureNumber',
2261 my $result = $snmp_session->get_entries(-columns => [keys %emms_oid]);
2263 # No enclosure EMMs is OK
2264 return if !defined $result;
2266 @output = @{ get_snmp_output($result, \%emms_oid) };
2269 foreach my $enc (@enclosures) {
2270 push @output, @{ run_omreport("storage enclosure controller=$enc->{ctrl} enclosure=$enc->{id} info=emms") };
2271 map_item('ctrl', $enc->{ctrl}, \@output);
2272 map_item('encl_id', $enc->{id}, \@output);
2273 map_item('encl_name', $enc->{name}, \@output);
2284 5 => 'Not Installed',
2289 # Check temperature probes on each of the enclosures
2291 foreach my $out (@output) {
2293 $id = $out->{enclosureManagementModuleNumber} - 1;
2294 $name = $out->{enclosureManagementModuleName};
2295 $state = $emms_state{$out->{enclosureManagementModuleState}};
2296 $status = $snmp_status{$out->{enclosureManagementModuleComponentStatus}};
2297 $encl_id = $out->{enclosureManagementModuleConnectionEnclosureNumber} - 1;
2298 $encl_name = $out->{enclosureManagementModuleConnectionEnclosureName};
2299 $nexus = convert_nexus($out->{enclosureManagementModuleNexusID});
2303 $name = $out->{'Name'};
2304 $state = $out->{'State'};
2305 $status = $out->{'Status'};
2306 $encl_id = join q{:}, $out->{ctrl}, $out->{'encl_id'};
2307 $encl_name = $out->{encl_name};
2308 $nexus = join q{:}, $out->{ctrl}, $out->{'encl_id'}, $id;
2311 next EMM if blacklisted('encl_emm', $nexus);
2314 if ($status ne 'Ok') {
2315 my $msg = sprintf '%s in enclosure %s [%s] needs attention: %s',
2316 $name, $encl_id, $encl_name, $state;
2317 report('storage', $msg, $status2nagios{$status}, $nexus);
2321 my $msg = sprintf '%s in enclosure %s [%s] is %s',
2322 $name, $encl_id, $encl_name, $state;
2323 report('storage', $msg, $E_OK, $nexus);
2330 #-----------------------------------------
2331 # CHASSIS: Check memory modules
2332 #-----------------------------------------
2334 return if blacklisted('dimm', 'all');
2338 my $location = undef;
2347 '1.3.6.1.4.1.674.10892.1.1100.50.1.2.1' => 'memoryDeviceIndex',
2348 '1.3.6.1.4.1.674.10892.1.1100.50.1.5.1' => 'memoryDeviceStatus',
2349 '1.3.6.1.4.1.674.10892.1.1100.50.1.8.1' => 'memoryDeviceLocationName',
2350 '1.3.6.1.4.1.674.10892.1.1100.50.1.14.1' => 'memoryDeviceSize',
2351 '1.3.6.1.4.1.674.10892.1.1100.50.1.20.1' => 'memoryDeviceFailureModes',
2353 my $result = $snmp_session->get_entries(-columns => [keys %dimm_oid]);
2355 if (!defined $result) {
2356 printf "SNMP ERROR [memory]: %s.\n", $snmp_session->error;
2357 $snmp_session->close;
2361 @output = @{ get_snmp_output($result, \%dimm_oid) };
2364 @output = @{ run_omreport("$omopt_chassis memory") };
2367 # Note: These values are bit masks, so combination values are
2368 # possible. If value is 0 (zero), memory device has no faults.
2371 1 => 'ECC single bit correction warning rate exceeded',
2372 2 => 'ECC single bit correction failure rate exceeded',
2373 4 => 'ECC multibit fault encountered',
2374 8 => 'ECC single bit correction logging disabled',
2375 16 => 'device disabled because of spare activation',
2379 foreach my $out (@output) {
2380 @failures = (); # Initialize
2382 $index = $out->{memoryDeviceIndex};
2383 $status = $snmp_status{$out->{memoryDeviceStatus}};
2384 $location = $out->{memoryDeviceLocationName};
2385 $size = sprintf '%d MB', $out->{memoryDeviceSize}/1024;
2386 $modes = $out->{memoryDeviceFailureModes};
2388 foreach my $mask (sort keys %failure_mode) {
2389 if (($modes & $mask) != 0) { push @failures, $failure_mode{$mask}; }
2394 $index = $out->{'Type'} eq '[Not Occupied]' ? undef : $out->{'Index'};
2395 $status = $out->{'Status'};
2396 $location = $out->{'Connector Name'};
2397 $size = $out->{'Size'};
2398 if (defined $size) {
2399 $size =~ s{\s\s}{ }gxms;
2401 # Run 'omreport chassis memory index=X' to get the failures
2402 if ($status ne 'Ok' && defined $index) {
2403 foreach (@{ run_command("$omreport $omopt_chassis memory index=$index -fmt ssv") }) {
2404 if (m/\A Failures; (.+?) \z/xms) {
2405 chop(my $fail = $1);
2406 push @failures, split m{\.}xms, $fail;
2411 $location =~ s{\A \s*(.*?)\s* \z}{$1}xms;
2413 next DIMM if blacklisted('dimm', $index);
2415 # Ignore empty memory slots
2416 next DIMM if !defined $index;
2419 if ($status ne 'Ok') {
2421 if (scalar @failures == 0) {
2422 $msg = sprintf 'Memory module %d [%s, %s] needs attention (%s)',
2423 $index, $location, $size, $status;
2426 $msg = sprintf 'Memory module %d [%s, %s] needs attention: %s',
2427 $index, $location, $size, (join q{, }, @failures);
2430 report('chassis', $msg, $status2nagios{$status}, $index);
2434 my $msg = sprintf 'Memory module %d [%s, %s] is %s',
2435 $index, $location, $size, $status;
2436 report('chassis', $msg, $E_OK, $index);
2443 #-----------------------------------------
2444 # CHASSIS: Check fans
2445 #-----------------------------------------
2447 return if blacklisted('fan', 'all');
2451 my $reading = undef;
2452 my $location = undef;
2453 my $max_crit = undef;
2454 my $max_warn = undef;
2460 '1.3.6.1.4.1.674.10892.1.700.12.1.2.1' => 'coolingDeviceIndex',
2461 '1.3.6.1.4.1.674.10892.1.700.12.1.5.1' => 'coolingDeviceStatus',
2462 '1.3.6.1.4.1.674.10892.1.700.12.1.6.1' => 'coolingDeviceReading',
2463 '1.3.6.1.4.1.674.10892.1.700.12.1.8.1' => 'coolingDeviceLocationName',
2464 '1.3.6.1.4.1.674.10892.1.700.12.1.10.1' => 'coolingDeviceUpperCriticalThreshold',
2465 '1.3.6.1.4.1.674.10892.1.700.12.1.11.1' => 'coolingDeviceUpperNonCriticalThreshold',
2467 my $result = $snmp_session->get_entries(-columns => [keys %cool_oid]);
2469 if ($blade && !defined $result) {
2472 elsif (!$blade && !defined $result) {
2473 printf "SNMP ERROR [cooling]: %s.\n", $snmp_session->error;
2474 $snmp_session->close;
2478 @output = @{ get_snmp_output($result, \%cool_oid) };
2481 @output = @{ run_omreport("$omopt_chassis fans") };
2485 foreach my $out (@output) {
2487 $index = $out->{coolingDeviceIndex};
2488 $status = $snmp_probestatus{$out->{coolingDeviceStatus}};
2489 $reading = $out->{coolingDeviceReading};
2490 $location = $out->{coolingDeviceLocationName};
2491 $max_crit = exists $out->{coolingDeviceUpperCriticalThreshold}
2492 ? $out->{coolingDeviceUpperCriticalThreshold} : 0;
2493 $max_warn = exists $out->{coolingDeviceUpperNonCriticalThreshold}
2494 ? $out->{coolingDeviceUpperNonCriticalThreshold} : 0;
2497 $index = $out->{'Index'};
2498 $status = $out->{'Status'};
2499 $reading = $out->{'Reading'};
2500 $location = $out->{'Probe Name'};
2501 $max_crit = $out->{'Maximum Failure Threshold'} ne '[N/A]'
2502 ? $out->{'Maximum Failure Threshold'} : 0;
2503 $max_warn = $out->{'Maximum Warning Threshold'} ne '[N/A]'
2504 ? $out->{'Maximum Warning Threshold'} : 0;
2505 $reading =~ s{\A (\d+).* \z}{$1}xms;
2506 $max_warn =~ s{\A (\d+).* \z}{$1}xms;
2507 $max_crit =~ s{\A (\d+).* \z}{$1}xms;
2510 next FAN if blacklisted('fan', $index);
2513 if ($status ne 'Ok') {
2514 my $msg = sprintf 'Chassis fan %d [%s] needs attention: %s',
2515 $index, $location, $status;
2516 my $err = $snmp ? $probestatus2nagios{$status} : $status2nagios{$status};
2517 report('chassis', $msg, $err, $index);
2520 my $msg = sprintf 'Chassis fan %d [%s]: %s',
2521 $index, $location, $reading;
2522 report('chassis', $msg, $E_OK, $index);
2525 # Collect performance data
2526 if (defined $opt{perfdata}) {
2527 my $pname = lc $location;
2528 $pname =~ s{\s}{_}gxms;
2529 $pname =~ s{proc_}{cpu#}xms;
2530 my $pkey = join q{_}, 'fan', $index, $pname;
2531 my $pval = join q{;}, "${reading}RPM", $max_warn, $max_crit;
2532 $perfdata{$pkey} = $pval;
2539 #-----------------------------------------
2540 # CHASSIS: Check power supplies
2541 #-----------------------------------------
2542 sub check_powersupplies {
2543 return if blacklisted('ps', 'all');
2548 my $err_type = undef;
2556 '1.3.6.1.4.1.674.10892.1.600.12.1.2.1' => 'powerSupplyIndex',
2557 '1.3.6.1.4.1.674.10892.1.600.12.1.5.1' => 'powerSupplyStatus',
2558 '1.3.6.1.4.1.674.10892.1.600.12.1.7.1' => 'powerSupplyType',
2559 '1.3.6.1.4.1.674.10892.1.600.12.1.11.1' => 'powerSupplySensorState',
2560 '1.3.6.1.4.1.674.10892.1.600.12.1.12.1' => 'powerSupplyConfigurationErrorType',
2562 my $result = $snmp_session->get_entries(-columns => [keys %ps_oid]);
2564 # No instrumented PSU is OK (blades, low-end servers)
2565 return 0 if !defined $result;
2567 @output = @{ get_snmp_output($result, \%ps_oid) };
2570 @output = @{ run_omreport("$omopt_chassis pwrsupplies") };
2580 6 => 'Uninterruptible Power Supply',
2590 1 => 'Presence detected',
2591 2 => 'Failure detected',
2592 4 => 'Predictive Failure',
2594 16 => 'AC lost or out-of-range',
2595 32 => 'AC out-of-range but present',
2596 64 => 'Configuration error',
2599 my %ps_config_error_type
2601 1 => 'Vendor mismatch',
2602 2 => 'Revision mismatch',
2603 3 => 'Processor missing',
2607 foreach my $out (@output) {
2609 @states = (); # contains states for the PS
2611 $index = $out->{powerSupplyIndex} - 1;
2612 $status = $snmp_status{$out->{powerSupplyStatus}};
2613 $type = $ps_type{$out->{powerSupplyType}};
2614 $err_type = defined $out->{powerSupplyConfigurationErrorType}
2615 ? $ps_config_error_type{$out->{powerSupplyConfigurationErrorType}} : undef;
2617 # get the combined state from the StatusReading OID
2618 foreach my $mask (sort keys %ps_state) {
2619 if (($out->{powerSupplySensorState} & $mask) != 0) {
2620 push @states, $ps_state{$mask};
2624 # If configuration error, also include the error type
2625 if (defined $err_type) {
2626 push @states, $err_type;
2629 # Finally, construct the state string
2630 $state = join q{, }, @states;
2633 $index = $out->{'Index'};
2634 $status = $out->{'Status'};
2635 $type = $out->{'Type'};
2636 $state = $out->{'Online Status'};
2639 next PS if blacklisted('ps', $index);
2642 if ($status ne 'Ok') {
2643 my $msg = sprintf 'Power Supply %d [%s] needs attention: %s',
2644 $index, $type, $state;
2645 report('chassis', $msg, $status2nagios{$status}, $index);
2648 my $msg = sprintf 'Power Supply %d [%s]: %s',
2649 $index, $type, $state;
2650 report('chassis', $msg, $E_OK, $index);
2657 #-----------------------------------------
2658 # CHASSIS: Check temperatures
2659 #-----------------------------------------
2660 sub check_temperatures {
2661 return if blacklisted('temp', 'all');
2665 my $reading = undef;
2666 my $location = undef;
2667 my $max_crit = undef;
2668 my $max_warn = undef;
2669 my $min_warn = undef;
2670 my $min_crit = undef;
2672 my $discrete = undef;
2675 # Getting custom temperature thresholds (user option)
2676 my %warn_threshold = %{ custom_temperature_thresholds('w') };
2677 my %crit_threshold = %{ custom_temperature_thresholds('c') };
2682 '1.3.6.1.4.1.674.10892.1.700.20.1.2.1' => 'temperatureProbeIndex',
2683 '1.3.6.1.4.1.674.10892.1.700.20.1.5.1' => 'temperatureProbeStatus',
2684 '1.3.6.1.4.1.674.10892.1.700.20.1.6.1' => 'temperatureProbeReading',
2685 '1.3.6.1.4.1.674.10892.1.700.20.1.7.1' => 'temperatureProbeType',
2686 '1.3.6.1.4.1.674.10892.1.700.20.1.8.1' => 'temperatureProbeLocationName',
2687 '1.3.6.1.4.1.674.10892.1.700.20.1.10.1' => 'temperatureProbeUpperCriticalThreshold',
2688 '1.3.6.1.4.1.674.10892.1.700.20.1.11.1' => 'temperatureProbeUpperNonCriticalThreshold',
2689 '1.3.6.1.4.1.674.10892.1.700.20.1.12.1' => 'temperatureProbeLowerNonCriticalThreshold',
2690 '1.3.6.1.4.1.674.10892.1.700.20.1.13.1' => 'temperatureProbeLowerCriticalThreshold',
2691 '1.3.6.1.4.1.674.10892.1.700.20.1.16.1' => 'temperatureProbeDiscreteReading',
2693 # this didn't work well for some reason
2694 #my $result = $snmp_session->get_entries(-columns => [keys %temp_oid]);
2696 # Getting values using the table
2697 my $temperatureProbeTable = '1.3.6.1.4.1.674.10892.1.700.20';
2698 my $result = $snmp_session->get_table(-baseoid => $temperatureProbeTable);
2700 if (!defined $result) {
2701 printf "SNMP ERROR [temperatures]: %s.\n", $snmp_session->error;
2702 $snmp_session->close;
2706 @output = @{ get_snmp_output($result, \%temp_oid) };
2709 @output = @{ run_omreport("$omopt_chassis temps") };
2714 1 => 'Other', # type is other than following values
2715 2 => 'Unknown', # type is unknown
2716 3 => 'AmbientESM', # type is Ambient Embedded Systems Management temperature probe
2717 16 => 'Discrete', # type is temperature probe with discrete reading
2721 foreach my $out (@output) {
2723 $index = $out->{temperatureProbeIndex} - 1;
2724 $status = $snmp_probestatus{$out->{temperatureProbeStatus}};
2725 $reading = $out->{temperatureProbeReading} / 10;
2726 $location = $out->{temperatureProbeLocationName};
2727 $max_crit = $out->{temperatureProbeUpperCriticalThreshold} / 10;
2728 $max_warn = $out->{temperatureProbeUpperNonCriticalThreshold} / 10;
2729 $min_crit = exists $out->{temperatureProbeLowerCriticalThreshold}
2730 ? $out->{temperatureProbeLowerCriticalThreshold} / 10 : '[N/A]';
2731 $min_warn = exists $out->{temperatureProbeLowerNonCriticalThreshold}
2732 ? $out->{temperatureProbeLowerNonCriticalThreshold} / 10 : '[N/A]';
2733 $type = $probe_type{$out->{temperatureProbeType}};
2734 $discrete = exists $out->{temperatureProbeDiscreteReading}
2735 ? $out->{temperatureProbeDiscreteReading} : undef;
2738 $index = $out->{'Index'};
2739 $status = $out->{'Status'};
2740 $reading = $out->{'Reading'}; $reading =~ s{\.0\s+C}{}xms;
2741 $location = $out->{'Probe Name'};
2742 $max_crit = $out->{'Maximum Failure Threshold'}; $max_crit =~ s{\.0\s+C}{}xms;
2743 $max_warn = $out->{'Maximum Warning Threshold'}; $max_warn =~ s{\.0\s+C}{}xms;
2744 $min_crit = $out->{'Minimum Failure Threshold'}; $min_crit =~ s{\.0\s+C}{}xms;
2745 $min_warn = $out->{'Minimum Warning Threshold'}; $min_warn =~ s{\.0\s+C}{}xms;
2746 $type = $reading =~ m{\A\d+\z}xms ? 'AmbientESM' : 'Discrete';
2747 $discrete = $reading;
2750 next TEMP if blacklisted('temp', $index);
2753 if ($type eq 'Discrete') {
2754 my $msg = sprintf 'Temperature probe %d (%s): is %s',
2755 $index, $location, $discrete;
2756 my $err = $snmp ? $probestatus2nagios{$status} : $status2nagios{$status};
2757 report('chassis', $msg, $err, $index);
2760 # First check according to custom thresholds
2761 if (exists $crit_threshold{$index}{max} and $reading > $crit_threshold{$index}{max}) {
2762 # Custom critical MAX
2763 my $msg = sprintf 'Temperature Probe %d [%s] reads %d C (custom max=%d)',
2764 $index, $location, $reading, $crit_threshold{$index}{max};
2765 report('chassis', $msg, $E_CRITICAL, $index);
2767 elsif (exists $warn_threshold{$index}{max} and $reading > $warn_threshold{$index}{max}) {
2768 # Custom warning MAX
2769 my $msg = sprintf 'Temperature Probe %d [%s] reads %d C (custom max=%d)',
2770 $index, $location, $reading, $warn_threshold{$index}{max};
2771 report('chassis', $msg, $E_WARNING, $index);
2773 elsif (exists $crit_threshold{$index}{min} and $reading < $crit_threshold{$index}{min}) {
2774 # Custom critical MIN
2775 my $msg = sprintf 'Temperature Probe %d [%s] reads %d C (custom min=%d)',
2776 $index, $location, $reading, $crit_threshold{$index}{min};
2777 report('chassis', $msg, $E_CRITICAL, $index);
2779 elsif (exists $warn_threshold{$index}{min} and $reading < $warn_threshold{$index}{min}) {
2780 # Custom warning MIN
2781 my $msg = sprintf 'Temperature Probe %d [%s] reads %d C (custom min=%d)',
2782 $index, $location, $reading, $warn_threshold{$index}{min};
2783 report('chassis', $msg, $E_WARNING, $index);
2785 elsif ($status ne 'Ok' and $max_crit ne '[N/A]' and $reading > $max_crit) {
2786 my $msg = sprintf 'Temperature Probe %d [%s] is critically high at %d C',
2787 $index, $location, $reading;
2788 my $err = $snmp ? $probestatus2nagios{$status} : $status2nagios{$status};
2789 report('chassis', $msg, $err, $index);
2791 elsif ($status ne 'Ok' and $max_warn ne '[N/A]' and $reading > $max_warn) {
2792 my $msg = sprintf 'Temperature Probe %d [%s] is too high at %d C',
2793 $index, $location, $reading;
2794 my $err = $snmp ? $probestatus2nagios{$status} : $status2nagios{$status};
2795 report('chassis', $msg, $err, $index);
2797 elsif ($status ne 'Ok' and $min_crit ne '[N/A]' and $reading < $min_crit) {
2798 my $msg = sprintf 'Temperature Probe %d [%s] is critically low at %d C',
2799 $index, $location, $reading;
2800 my $err = $snmp ? $probestatus2nagios{$status} : $status2nagios{$status};
2801 report('chassis', $msg, $err, $index);
2803 elsif ($status ne 'Ok' and $min_warn ne '[N/A]' and $reading < $min_warn) {
2804 my $msg = sprintf 'Temperature Probe %d [%s] is too low at %d C',
2805 $index, $location, $reading;
2806 my $err = $snmp ? $probestatus2nagios{$status} : $status2nagios{$status};
2807 report('chassis', $msg, $err, $index);
2811 my $msg = sprintf 'Temperature Probe %d [%s] reads %d C',
2812 $index, $location, $reading;
2813 if ($min_warn eq '[N/A]' and $min_crit eq '[N/A]') {
2814 $msg .= sprintf ' (max=%s/%s)', $max_warn, $max_crit;
2817 $msg .= sprintf ' (min=%s/%s, max=%s/%s)',
2818 $min_warn, $min_crit, $max_warn, $max_crit;
2820 my $err = $snmp ? $probestatus2nagios{$status} : $status2nagios{$status};
2821 report('chassis', $msg, $err, $index);
2824 # Collect performance data
2825 if (defined $opt{perfdata}) {
2826 my $pname = lc $location;
2827 $pname =~ s{\s}{_}gxms;
2828 $pname =~ s{_temp\z}{}xms;
2829 $pname =~ s{proc_}{cpu#}xms;
2830 my $pkey = join q{_}, 'temp', $index, $pname;
2831 my $pval = join q{;}, "${reading}C", $max_warn, $max_crit;
2832 $perfdata{$pkey} = $pval;
2840 #-----------------------------------------
2841 # CHASSIS: Check processors
2842 #-----------------------------------------
2843 sub check_processors {
2844 return if blacklisted('cpu', 'all');
2857 # NOTE: For some reason, older models don't have the
2858 # "Processor Device Status" OIDs. We check both the newer
2859 # (preferred) OIDs and the old ones.
2863 '1.3.6.1.4.1.674.10892.1.1100.30.1.2.1' => 'processorDeviceIndex',
2864 '1.3.6.1.4.1.674.10892.1.1100.30.1.5.1' => 'processorDeviceStatus',
2865 '1.3.6.1.4.1.674.10892.1.1100.30.1.8.1' => 'processorDeviceManufacturerName',
2866 '1.3.6.1.4.1.674.10892.1.1100.30.1.9.1' => 'processorDeviceStatusState',
2867 '1.3.6.1.4.1.674.10892.1.1100.30.1.10.1' => 'processorDeviceFamily',
2868 '1.3.6.1.4.1.674.10892.1.1100.30.1.12.1' => 'processorDeviceCurrentSpeed',
2869 '1.3.6.1.4.1.674.10892.1.1100.30.1.23.1' => 'processorDeviceBrandName',
2870 '1.3.6.1.4.1.674.10892.1.1100.32.1.2.1' => 'processorDeviceStatusIndex',
2871 '1.3.6.1.4.1.674.10892.1.1100.32.1.5.1' => 'processorDeviceStatusStatus',
2872 '1.3.6.1.4.1.674.10892.1.1100.32.1.6.1' => 'processorDeviceStatusReading',
2875 my $result = $snmp_session->get_entries(-columns => [keys %cpu_oid]);
2877 if (!defined $result) {
2878 printf "SNMP ERROR [processors]: %s.\n", $snmp_session->error;
2879 $snmp_session->close;
2883 @output = @{ get_snmp_output($result, \%cpu_oid) };
2886 @output = @{ run_omreport("$omopt_chassis processors") };
2891 1 => 'Other', # other than following values
2892 2 => 'Unknown', # unknown
2893 3 => 'Enabled', # enabled
2894 4 => 'User Disabled', # disabled by user via BIOS setup
2895 5 => 'BIOS Disabled', # disabled by BIOS (POST error)
2901 1 => 'Internal Error', # Internal Error
2902 2 => 'Thermal Trip', # Thermal Trip
2903 32 => 'Configuration Error', # Configuration Error
2904 128 => 'Present', # Processor Present
2905 256 => 'Disabled', # Processor Disabled
2906 512 => 'Terminator Present', # Terminator Present
2907 1024 => 'Throttled', # Processor Throttled
2910 # Mapping between family numbers from SNMP and actual CPU family
2913 1 => 'Other', 2 => 'Unknown', 3 => '8086',
2914 4 => '80286', 5 => '386', 6 => '486',
2915 7 => '8087', 8 => '80287', 9 => '80387',
2916 10 => '80487', 11 => 'Pentium', 12 => 'Pentium Pro',
2917 13 => 'Pentium II', 14 => 'Pentium with MMX', 15 => 'Celeron',
2918 16 => 'Pentium II Xeon', 17 => 'Pentium III', 18 => 'Pentium III Xeon',
2919 19 => 'Pentium III', 20 => 'Itanium', 21 => 'Xeon',
2920 22 => 'Pentium 4', 23 => 'Xeon MP', 24 => 'Itanium 2',
2921 25 => 'K5', 26 => 'K6', 27 => 'K6-2',
2922 28 => 'K6-3', 29 => 'Athlon', 30 => 'AMD2900',
2923 31 => 'K6-2+', 32 => 'Power PC', 33 => 'Power PC 601',
2924 34 => 'Power PC 603', 35 => 'Power PC 603+', 36 => 'Power PC 604',
2925 37 => 'Power PC 620', 38 => 'Power PC x704', 39 => 'Power PC 750',
2926 48 => 'Alpha', 49 => 'Alpha 21064', 50 => 'Alpha 21066',
2927 51 => 'Alpha 21164', 52 => 'Alpha 21164PC', 53 => 'Alpha 21164a',
2928 54 => 'Alpha 21264', 55 => 'Alpha 21364', 64 => 'MIPS',
2929 65 => 'MIPS R4000', 66 => 'MIPS R4200', 67 => 'MIPS R4400',
2930 68 => 'MIPS R4600', 69 => 'MIPS R10000', 80 => 'SPARC',
2931 81 => 'SuperSPARC', 82 => 'microSPARC II', 83 => 'microSPARC IIep',
2932 84 => 'UltraSPARC', 85 => 'UltraSPARC II', 86 => 'UltraSPARC IIi',
2933 87 => 'UltraSPARC III', 88 => 'UltraSPARC IIIi', 96 => '68040',
2934 97 => '68xxx', 98 => '68000', 99 => '68010',
2935 100 => '68020', 101 => '68030', 112 => 'Hobbit',
2936 120 => 'Crusoe TM5000', 121 => 'Crusoe TM3000', 122 => 'Efficeon TM8000',
2937 128 => 'Weitek', 131 => 'Athlon 64', 132 => 'Opteron',
2938 133 => 'Sempron', 134 => 'Turion 64 Mobile', 135 => 'Dual-Core Opteron',
2939 136 => 'Athlon 64 X2 DC', 137 => 'Turion 64 X2 M', 138 => 'Quad-Core Opteron',
2940 139 => '3rd gen Opteron', 144 => 'PA-RISC', 145 => 'PA-RISC 8500',
2941 146 => 'PA-RISC 8000', 147 => 'PA-RISC 7300LC', 148 => 'PA-RISC 7200',
2942 149 => 'PA-RISC 7100LC', 150 => 'PA-RISC 7100', 160 => 'V30',
2943 171 => 'Dual-Core Xeon 5200', 172 => 'Dual-Core Xeon 7200', 173 => 'Quad-Core Xeon 7300',
2944 174 => 'Quad-Core Xeon 7400', 175 => 'Multi-Core Xeon 7400', 176 => 'M1',
2945 177 => 'M2', 180 => 'AS400', 182 => 'Athlon XP',
2946 183 => 'Athlon MP', 184 => 'Duron', 185 => 'Pentium M',
2947 186 => 'Celeron D', 187 => 'Pentium D', 188 => 'Pentium Extreme',
2948 189 => 'Core Solo', 190 => 'Core2', 191 => 'Core2 Duo',
2949 198 => 'Core i7', 199 => 'Dual-Core Celeron', 200 => 'IBM390',
2950 201 => 'G4', 202 => 'G5', 203 => 'ESA/390 G6',
2951 204 => 'z/Architectur', 210 => 'C7-M', 211 => 'C7-D',
2952 212 => 'C7', 213 => 'Eden', 214 => 'Multi-Core Xeon',
2953 215 => 'Dual-Core Xeon 3xxx', 216 => 'Quad-Core Xeon 3xxx', 218 => 'Dual-Core Xeon 5xxx',
2954 219 => 'Quad-Core Xeon 5xxx', 221 => 'Dual-Core Xeon 7xxx', 222 => 'Quad-Core Xeon 7xxx',
2955 223 => 'Multi-Core Xeon 7xxx', 250 => 'i860', 251 => 'i960',
2959 foreach my $out (@output) {
2961 $index = exists $out->{processorDeviceStatusIndex}
2962 ? $out->{processorDeviceStatusIndex} - 1
2963 : $out->{processorDeviceIndex} - 1;
2964 $status = exists $out->{processorDeviceStatusStatus}
2965 ? $snmp_status{$out->{processorDeviceStatusStatus}}
2966 : $snmp_status{$out->{processorDeviceStatus}};
2967 if (exists $out->{processorDeviceStatusReading}) {
2968 my @states = (); # contains states for the CPU
2970 # get the combined state from the StatusReading OID
2971 foreach my $mask (sort keys %cpu_reading) {
2972 if (($out->{processorDeviceStatusReading} & $mask) != 0) {
2973 push @states, $cpu_reading{$mask};
2977 # Finally, create the state string
2978 $state = join q{, }, @states;
2981 $state = $cpu_state{$out->{processorDeviceStatusState}};
2983 $man = $out->{processorDeviceManufacturerName};
2984 $family = (exists $out->{processorDeviceFamily}
2985 and exists $cpu_family{$out->{processorDeviceFamily}})
2986 ? $cpu_family{$out->{processorDeviceFamily}} : undef;
2987 $speed = $out->{processorDeviceCurrentSpeed};
2988 $brand = $out->{processorDeviceBrandName};
2991 $index = $out->{'Index'};
2992 $status = $out->{'Status'};
2993 $state = $out->{'State'};
2994 $brand = exists $out->{'Processor Brand'} ? $out->{'Processor Brand'} : undef;
2995 $family = exists $out->{'Processor Family'} ? $out->{'Processor Family'} : undef;
2996 $man = exists $out->{'Processor Manufacturer'} ? $out->{'Processor Manufacturer'} : undef;
2997 $speed = exists $out->{'Current Speed'} ? $out->{'Current Speed'} : undef;
3000 next CPU if blacklisted('cpu', $index);
3002 # Ignore unoccupied CPU slots (omreport)
3003 next CPU if (defined $out->{'Processor Manufacturer'}
3004 and $out->{'Processor Manufacturer'} eq '[Not Occupied]')
3005 or (defined $out->{'Processor Brand'} and $out->{'Processor Brand'} eq '[Not Occupied]');
3007 # Ignore unoccupied CPU slots (snmp)
3008 if ($snmp and exists $out->{processorDeviceStatusReading}
3009 and $out->{processorDeviceStatusReading} == 0) {
3015 if (defined $brand) {
3016 $brand =~ s{\s\s+}{ }gxms;
3017 $brand =~ s{\((R|tm)\)}{}gxms;
3018 $brand =~ s{\s(CPU|Processor)}{}xms;
3019 $brand =~ s{\s\@}{}xms;
3021 elsif (defined $family and defined $man and defined $speed) {
3022 $speed =~ s{\A (\d+) .*}{$1}xms;
3023 $brand = sprintf '%s %s %.2fGHz', $man, $family, $speed / 1000;
3030 if ($status ne 'Ok') {
3031 my $msg = sprintf 'Processor %d [%s] needs attention: %s',
3032 $index, $brand, $state;
3033 report('chassis', $msg, $status2nagios{$status}, $index);
3037 my $msg = sprintf 'Processor %d [%s] is %s',
3038 $index, $brand, $state;
3039 report('chassis', $msg, $E_OK, $index);
3046 #-----------------------------------------
3047 # CHASSIS: Check voltage probes
3048 #-----------------------------------------
3050 return if blacklisted('volt', 'all');
3054 my $reading = undef;
3055 my $location = undef;
3061 '1.3.6.1.4.1.674.10892.1.600.20.1.2.1' => 'voltageProbeIndex',
3062 '1.3.6.1.4.1.674.10892.1.600.20.1.5.1' => 'voltageProbeStatus',
3063 '1.3.6.1.4.1.674.10892.1.600.20.1.6.1' => 'voltageProbeReading',
3064 '1.3.6.1.4.1.674.10892.1.600.20.1.8.1' => 'voltageProbeLocationName',
3065 '1.3.6.1.4.1.674.10892.1.600.20.1.16.1' => 'voltageProbeDiscreteReading',
3068 my $voltageProbeTable = '1.3.6.1.4.1.674.10892.1.600.20.1';
3069 my $result = $snmp_session->get_table(-baseoid => $voltageProbeTable);
3071 if (!defined $result) {
3072 printf "SNMP ERROR [voltage]: %s.\n", $snmp_session->error;
3073 $snmp_session->close;
3077 @output = @{ get_snmp_output($result, \%volt_oid) };
3080 @output = @{ run_omreport("$omopt_chassis volts") };
3083 my %volt_discrete_reading
3090 foreach my $out (@output) {
3092 $index = $out->{voltageProbeIndex} - 1;
3093 $status = $snmp_probestatus{$out->{voltageProbeStatus}};
3094 $reading = exists $out->{voltageProbeReading}
3095 ? sprintf('%.3f V', $out->{voltageProbeReading}/1000)
3096 : $volt_discrete_reading{$out->{voltageProbeDiscreteReading}};
3097 $location = $out->{voltageProbeLocationName};
3100 $index = $out->{'Index'};
3101 $status = $out->{'Status'};
3102 $reading = $out->{'Reading'};
3103 $location = $out->{'Probe Name'};
3106 next VOLT if blacklisted('volt', $index);
3109 my $msg = sprintf 'Voltage sensor %d [%s] is %s',
3110 $index, $location, $reading;
3111 my $err = $snmp ? $probestatus2nagios{$status} : $status2nagios{$status};
3112 report('chassis', $msg, $err, $index);
3118 #-----------------------------------------
3119 # CHASSIS: Check batteries
3120 #-----------------------------------------
3121 sub check_batteries {
3122 return if blacklisted('bp', 'all');
3126 my $reading = undef;
3127 my $location = undef;
3133 '1.3.6.1.4.1.674.10892.1.600.50.1.2.1' => 'batteryIndex',
3134 '1.3.6.1.4.1.674.10892.1.600.50.1.5.1' => 'batteryStatus',
3135 '1.3.6.1.4.1.674.10892.1.600.50.1.6.1' => 'batteryReading',
3136 '1.3.6.1.4.1.674.10892.1.600.50.1.7.1' => 'batteryLocationName',
3138 my $result = $snmp_session->get_entries(-columns => [keys %bat_oid]);
3140 # No batteries is OK
3141 return 0 if !defined $result;
3143 @output = @{ get_snmp_output($result, \%bat_oid) };
3146 @output = @{ run_omreport("$omopt_chassis batteries") };
3151 1 => 'Predictive Failure',
3153 4 => 'Presence Detected',
3157 foreach my $out (@output) {
3159 $index = $out->{batteryIndex} - 1;
3160 $status = $snmp_status{$out->{batteryStatus}};
3161 $reading = $bat_reading{$out->{batteryReading}};
3162 $location = $out->{batteryLocationName};
3165 $index = $out->{'Index'};
3166 $status = $out->{'Status'};
3167 $reading = $out->{'Reading'};
3168 $location = $out->{'Probe Name'};
3171 next BATTERY if blacklisted('bp', $index);
3174 my $msg = sprintf 'Battery probe %d [%s] is %s',
3175 $index, $location, $reading;
3176 report('chassis', $msg, $status2nagios{$status}, $index);
3182 #-----------------------------------------
3183 # CHASSIS: Check amperage probes (power monitoring)
3184 #-----------------------------------------
3185 sub check_pwrmonitoring {
3186 return if blacklisted('amp', 'all');
3190 my $reading = undef;
3191 my $location = undef;
3192 my $max_crit = undef;
3193 my $max_warn = undef;
3200 '1.3.6.1.4.1.674.10892.1.600.30.1.2.1' => 'amperageProbeIndex',
3201 '1.3.6.1.4.1.674.10892.1.600.30.1.5.1' => 'amperageProbeStatus',
3202 '1.3.6.1.4.1.674.10892.1.600.30.1.6.1' => 'amperageProbeReading',
3203 '1.3.6.1.4.1.674.10892.1.600.30.1.7.1' => 'amperageProbeType',
3204 '1.3.6.1.4.1.674.10892.1.600.30.1.8.1' => 'amperageProbeLocationName',
3205 '1.3.6.1.4.1.674.10892.1.600.30.1.10.1' => 'amperageProbeUpperCriticalThreshold',
3206 '1.3.6.1.4.1.674.10892.1.600.30.1.11.1' => 'amperageProbeUpperNonCriticalThreshold',
3207 '1.3.6.1.4.1.674.10892.1.600.30.1.16.1' => 'amperageProbeDiscreteReading',
3209 my $result = $snmp_session->get_entries(-columns => [keys %amp_oid]);
3211 # No pwrmonitoring is OK
3212 return 0 if !defined $result;
3214 @output = @{ get_snmp_output($result, \%amp_oid) };
3217 @output = @{ run_omreport("$omopt_chassis pwrmonitoring") };
3220 my %amp_type # Amperage probe types
3222 1 => 'amperageProbeTypeIsOther', # other than following values
3223 2 => 'amperageProbeTypeIsUnknown', # unknown
3224 3 => 'amperageProbeTypeIs1Point5Volt', # 1.5 amperage probe
3225 4 => 'amperageProbeTypeIs3Point3volt', # 3.3 amperage probe
3226 5 => 'amperageProbeTypeIs5Volt', # 5 amperage probe
3227 6 => 'amperageProbeTypeIsMinus5Volt', # -5 amperage probe
3228 7 => 'amperageProbeTypeIs12Volt', # 12 amperage probe
3229 8 => 'amperageProbeTypeIsMinus12Volt', # -12 amperage probe
3230 9 => 'amperageProbeTypeIsIO', # I/O probe
3231 10 => 'amperageProbeTypeIsCore', # Core probe
3232 11 => 'amperageProbeTypeIsFLEA', # FLEA (standby) probe
3233 12 => 'amperageProbeTypeIsBattery', # Battery probe
3234 13 => 'amperageProbeTypeIsTerminator', # SCSI Termination probe
3235 14 => 'amperageProbeTypeIs2Point5Volt', # 2.5 amperage probe
3236 15 => 'amperageProbeTypeIsGTL', # GTL (ground termination logic) probe
3237 16 => 'amperageProbeTypeIsDiscrete', # amperage probe with discrete reading
3238 23 => 'amperageProbeTypeIsPowerSupplyAmps', # Power Supply probe with reading in Amps
3239 24 => 'amperageProbeTypeIsPowerSupplyWatts', # Power Supply probe with reading in Watts
3240 25 => 'amperageProbeTypeIsSystemAmps', # System probe with reading in Amps
3241 26 => 'amperageProbeTypeIsSystemWatts', # System probe with reading in Watts
3252 'amperageProbeTypeIsPowerSupplyAmps' => 'hA', # tenths of Amps
3253 'amperageProbeTypeIsSystemAmps' => 'hA', # tenths of Amps
3254 'amperageProbeTypeIsPowerSupplyWatts' => 'W', # Watts
3255 'amperageProbeTypeIsSystemWatts' => 'W', # Watts
3256 'amperageProbeTypeIsDiscrete' => q{}, # discrete reading, no unit
3260 foreach my $out (@output) {
3262 $index = $out->{amperageProbeIndex} - 1;
3263 $status = $snmp_status{$out->{amperageProbeStatus}};
3264 $reading = $amp_type{$out->{amperageProbeType}} eq 'amperageProbeTypeIsDiscrete'
3265 ? $amp_discrete{$out->{amperageProbeDiscreteReading}}
3266 : $out->{amperageProbeReading};
3267 $location = $out->{amperageProbeLocationName};
3268 $max_crit = exists $out->{amperageProbeUpperCriticalThreshold}
3269 ? $out->{amperageProbeUpperCriticalThreshold} : 0;
3270 $max_warn = exists $out->{amperageProbeUpperNonCriticalThreshold}
3271 ? $out->{amperageProbeUpperNonCriticalThreshold} : 0;
3272 $unit = exists $amp_unit{$amp_type{$out->{amperageProbeType}}}
3273 ? $amp_unit{$amp_type{$out->{amperageProbeType}}} : 'mA';
3274 if ($unit eq 'hA') {
3282 $index = $out->{'Index'};
3283 next AMP if (!defined $index || $index !~ m/^\d+$/x);
3284 $status = $out->{'Status'};
3285 $reading = $out->{'Reading'};
3286 $location = $out->{'Probe Name'};
3287 $max_crit = $out->{'Failure Threshold'} ne '[N/A]'
3288 ? $out->{'Failure Threshold'} : 0;
3289 $max_warn = $out->{'Warning Threshold'} ne '[N/A]'
3290 ? $out->{'Warning Threshold'} : 0;
3291 $reading =~ s{\A (\d+.*?)\s+([a-zA-Z]+) \s*\z}{$1}xms;
3293 $max_warn =~ s{\A (\d+.*?)\s+[a-zA-Z]+ \s*\z}{$1}xms;
3294 $max_crit =~ s{\A (\d+.*?)\s+[a-zA-Z]+ \s*\z}{$1}xms;
3297 next AMP if blacklisted('amp', $index);
3298 next AMP if $index !~ m{\A \d+ \z}xms;
3301 my $msg = sprintf 'Amperage probe %d [%s] reads %s %s',
3302 $index, $location, $reading, $unit, $status;
3303 report('chassis', $msg, $status2nagios{$status}, $index);
3305 # Collect performance data
3306 if (defined $opt{perfdata}) {
3307 next AMP if $reading !~ m{\A \d+(\.\d+)? \z}xms; # discrete reading (not number)
3308 my $pname = lc $location;
3309 $pname =~ s{\s}{_}gxms;
3310 my $pkey = join q{_}, 'pwr_mon', $index, $pname;
3311 my $pval = join q{;}, "$reading$unit", $max_warn, $max_crit;
3312 $perfdata{$pkey} = $pval;
3316 # Collect EXTRA performance data not found at first run. This is a
3318 if (defined $opt{perfdata} && !$snmp) {
3324 foreach (keys %perfdata) {
3325 if (m/\A pwr_mon_(\d+)/xms) {
3331 foreach my $line (@{ run_command("$omreport $omopt_chassis pwrmonitoring -fmt ssv") }) {
3333 if ($line eq 'Location;Reading') {
3341 if ($found and $line =~ m/\A ([^;]+?) ; (\d*\.\d+) \s ([AW]) \z/xms) {
3345 $aname =~ s{\s}{_}gxms;
3347 # don't use an existing index
3348 while (exists $used{$index}) { ++$index; }
3350 $perfdata{"pwr_mon_${index}_${aname}"} = "$aval$aunit;0;0";
3360 #-----------------------------------------
3361 # CHASSIS: Check intrusion
3362 #-----------------------------------------
3363 sub check_intrusion {
3364 return if blacklisted('intr', 'all');
3368 my $reading = undef;
3374 '1.3.6.1.4.1.674.10892.1.300.70.1.2.1' => 'intrusionIndex',
3375 '1.3.6.1.4.1.674.10892.1.300.70.1.5.1' => 'intrusionStatus',
3376 '1.3.6.1.4.1.674.10892.1.300.70.1.6.1' => 'intrusionReading',
3378 my $result = $snmp_session->get_entries(-columns => [keys %int_oid]);
3380 # No intrusion is OK
3381 return 0 if !defined $result;
3383 @output = @{ get_snmp_output($result, \%int_oid) };
3386 @output = @{ run_omreport("$omopt_chassis intrusion") };
3391 1 => 'Not Breached', # chassis not breached and no uncleared breaches
3392 2 => 'Breached', # chassis currently breached
3393 3 => 'Breached Prior', # chassis breached prior to boot and has not been cleared
3394 4 => 'Breach Sensor Failure', # intrusion sensor has failed
3398 foreach my $out (@output) {
3400 $index = $out->{intrusionIndex} - 1;
3401 $status = $snmp_status{$out->{intrusionStatus}};
3402 $reading = $int_reading{$out->{intrusionReading}};
3405 $index = $out->{'Index'};
3406 $status = $out->{'Status'};
3407 $reading = $out->{'State'};
3410 next INTRUSION if blacklisted('intr', $index);
3413 if ($status ne 'Ok') {
3414 my $msg = sprintf 'Chassis intrusion %d detected: %s',
3416 report('chassis', $msg, $E_WARNING, $index);
3420 my $msg = sprintf 'Chassis intrusion %d detection: %s (%s)',
3421 $index, $status, $reading;
3422 report('chassis', $msg, $E_OK, $index);
3429 #-----------------------------------------
3430 # CHASSIS: Check alert log
3431 #-----------------------------------------
3432 sub check_alertlog {
3433 return if $snmp; # Not supported with SNMP
3435 my @output = @{ run_omreport("$omopt_system alertlog") };
3436 foreach my $out (@output) {
3437 ++$count{alert}{$out->{Severity}};
3440 # Create error messages and set exit value if appropriate
3442 if ($count{alert}{'Critical'} > 0) { $err = $E_CRITICAL; }
3443 elsif ($count{alert}{'Non-Critical'} > 0) { $err = $E_WARNING; }
3445 my $msg = sprintf 'Alert log content: %d critical, %d non-critical, %d ok',
3446 $count{alert}{'Critical'}, $count{alert}{'Non-Critical'}, $count{alert}{'Ok'};
3447 report('other', $msg, $err);
3452 #-----------------------------------------
3453 # CHASSIS: Check ESM log overall health
3454 #-----------------------------------------
3455 sub check_esmlog_health {
3459 my $systemStateEventLogStatus = '1.3.6.1.4.1.674.10892.1.200.10.1.41.1';
3460 my $result = $snmp_session->get_request(-varbindlist => [$systemStateEventLogStatus]);
3461 if (!defined $result) {
3462 my $msg = sprintf 'SNMP ERROR [esmhealth]: %s',
3463 $snmp_session->error;
3464 report('other', $msg, $E_UNKNOWN);
3466 $health = $snmp_status{$result->{$systemStateEventLogStatus}};
3469 foreach (@{ run_command("$omreport $omopt_system esmlog -fmt ssv") }) {
3470 if (m/\A Health;(.+) \z/xms) {
3478 # If the overall health of the ESM log is other than "Ok", the
3479 # fill grade of the log is more than 80% and the log should be
3481 if ($health eq 'Ok') {
3482 my $msg = sprintf 'ESM log health is Ok (less than 80%% full)';
3483 report('other', $msg, $E_OK);
3485 elsif ($health eq 'Critical') {
3486 my $msg = sprintf 'ESM log is 100%% full';
3487 report('other', $msg, $status2nagios{$health});
3490 my $msg = sprintf 'ESM log is more than 80%% full';
3491 report('other', $msg, $status2nagios{$health});
3497 #-----------------------------------------
3498 # CHASSIS: Check ESM log
3499 #-----------------------------------------
3506 '1.3.6.1.4.1.674.10892.1.300.40.1.7.1' => 'eventLogSeverityStatus',
3508 my $result = $snmp_session->get_entries(-columns => [keys %esm_oid]);
3511 return if !defined $result;
3513 @output = @{ get_snmp_output($result, \%esm_oid) };
3514 foreach my $out (@output) {
3515 ++$count{esm}{$snmp_status{$out->{eventLogSeverityStatus}}};
3519 @output = @{ run_omreport("$omopt_system esmlog") };
3520 foreach my $out (@output) {
3521 ++$count{esm}{$out->{Severity}};
3525 # Create error messages and set exit value if appropriate
3527 if ($count{esm}{'Critical'} > 0) { $err = $E_CRITICAL; }
3528 elsif ($count{esm}{'Non-Critical'} > 0) { $err = $E_WARNING; }
3530 my $msg = sprintf 'ESM log content: %d critical, %d non-critical, %d ok',
3531 $count{esm}{'Critical'}, $count{esm}{'Non-Critical'}, $count{esm}{'Ok'};
3532 report('other', $msg, $err);
3538 # Handy function for checking all storage components
3541 check_controllers();
3542 check_physical_disks();
3543 check_virtual_disks();
3544 check_cache_battery();
3547 check_enclosure_fans();
3548 check_enclosure_pwr();
3549 check_enclosure_temp();
3550 check_enclosure_emms();
3556 #---------------------------------------------------------------------
3558 #---------------------------------------------------------------------
3561 # Fetch output from 'omreport chassis info', put in sysinfo hash
3563 sub get_omreport_chassis_info {
3564 if (open my $INFO, '-|', "$omreport $omopt_chassis info -fmt ssv") {
3565 my @lines = <$INFO>;
3568 next if !m/\A (Chassis\sModel|Chassis\sService\sTag|Model|Service\sTag)/xms;
3569 my ($key, $val) = split /;/xms;
3570 $key =~ s{\s+\z}{}xms; # remove trailing whitespace
3571 $val =~ s{\s+\z}{}xms; # remove trailing whitespace
3572 if ($key eq 'Chassis Model' or $key eq 'Model') {
3573 $sysinfo{model} = $val;
3575 if ($key eq 'Chassis Service Tag' or $key eq 'Service Tag') {
3576 $sysinfo{serial} = $val;
3584 # Fetch output from 'omreport chassis bios', put in sysinfo hash
3586 sub get_omreport_chassis_bios {
3587 if (open my $BIOS, '-|', "$omreport $omopt_chassis bios -fmt ssv") {
3588 my @lines = <$BIOS>;
3592 my ($key, $val) = split /;/xms;
3593 $key =~ s{\s+\z}{}xms; # remove trailing whitespace
3594 $val =~ s{\s+\z}{}xms; # remove trailing whitespace
3595 $sysinfo{bios} = $val if $key eq 'Version';
3596 $sysinfo{biosdate} = $val if $key eq 'Release Date';
3603 # Fetch output from 'omreport system operatingsystem', put in sysinfo hash
3605 sub get_omreport_system_operatingsystem {
3606 if (open my $VER, '-|', "$omreport $omopt_system operatingsystem -fmt ssv") {
3611 my ($key, $val) = split /;/xms;
3612 $key =~ s{\s+\z}{}xms; # remove trailing whitespace
3613 $val =~ s{\s+\z}{}xms; # remove trailing whitespace
3614 if ($key eq 'Operating System') {
3615 $sysinfo{osname} = $val;
3617 elsif ($key eq 'Operating System Version') {
3618 $sysinfo{osver} = $val;
3626 # Fetch output from 'omreport about', put in sysinfo hash
3628 sub get_omreport_about {
3629 if (open my $OM, '-|', "$omreport about -fmt ssv") {
3633 if (m/\A Version;(.+) \z/xms) {
3643 # Fetch chassis info via SNMP, put in sysinfo hash
3645 sub get_snmp_chassis_info {
3648 '1.3.6.1.4.1.674.10892.1.300.10.1.9.1' => 'chassisModelName',
3649 '1.3.6.1.4.1.674.10892.1.300.10.1.11.1' => 'chassisServiceTagName',
3652 my $chassisInformationTable = '1.3.6.1.4.1.674.10892.1.300.10.1';
3653 my $result = $snmp_session->get_table(-baseoid => $chassisInformationTable);
3655 if (defined $result) {
3656 foreach my $oid (keys %{ $result }) {
3657 if (exists $chassis_oid{$oid} and $chassis_oid{$oid} eq 'chassisModelName') {
3658 $sysinfo{model} = $result->{$oid};
3659 $sysinfo{model} =~ s{\s+\z}{}xms; # remove trailing whitespace
3661 elsif (exists $chassis_oid{$oid} and $chassis_oid{$oid} eq 'chassisServiceTagName') {
3662 $sysinfo{serial} = $result->{$oid};
3667 my $msg = sprintf 'SNMP ERROR getting chassis info: %s',
3668 $snmp_session->error;
3669 report('other', $msg, $E_UNKNOWN);
3675 # Fetch BIOS info via SNMP, put in sysinfo hash
3677 sub get_snmp_chassis_bios {
3680 '1.3.6.1.4.1.674.10892.1.300.50.1.7.1.1' => 'systemBIOSReleaseDateName',
3681 '1.3.6.1.4.1.674.10892.1.300.50.1.8.1.1' => 'systemBIOSVersionName',
3684 my $systemBIOSTable = '1.3.6.1.4.1.674.10892.1.300.50.1';
3685 my $result = $snmp_session->get_table(-baseoid => $systemBIOSTable);
3687 if (defined $result) {
3688 foreach my $oid (keys %{ $result }) {
3689 if (exists $bios_oid{$oid} and $bios_oid{$oid} eq 'systemBIOSReleaseDateName') {
3690 $sysinfo{biosdate} = $result->{$oid};
3691 $sysinfo{biosdate} =~ s{\A (\d{4})(\d{2})(\d{2}).*}{$2/$3/$1}xms;
3693 elsif (exists $bios_oid{$oid} and $bios_oid{$oid} eq 'systemBIOSVersionName') {
3694 $sysinfo{bios} = $result->{$oid};
3699 my $msg = sprintf 'SNMP ERROR getting BIOS info: %s',
3700 $snmp_session->error;
3701 report('other', $msg, $E_UNKNOWN);
3707 # Fetch OS info via SNMP, put in sysinfo hash
3709 sub get_snmp_system_operatingsystem {
3712 '1.3.6.1.4.1.674.10892.1.400.10.1.6.1' => 'operatingSystemOperatingSystemName',
3713 '1.3.6.1.4.1.674.10892.1.400.10.1.7.1' => 'operatingSystemOperatingSystemVersionName',
3716 my $operatingSystemTable = '1.3.6.1.4.1.674.10892.1.400.10.1';
3717 my $result = $snmp_session->get_table(-baseoid => $operatingSystemTable);
3719 if (defined $result) {
3720 foreach my $oid (keys %{ $result }) {
3721 if (exists $os_oid{$oid} and $os_oid{$oid} eq 'operatingSystemOperatingSystemName') {
3722 $sysinfo{osname} = ($result->{$oid});
3724 elsif (exists $os_oid{$oid} and $os_oid{$oid} eq 'operatingSystemOperatingSystemVersionName') {
3725 $sysinfo{osver} = $result->{$oid};
3730 my $msg = sprintf 'SNMP ERROR getting OS info: %s',
3731 $snmp_session->error;
3732 report('other', $msg, $E_UNKNOWN);
3738 # Fetch OMSA version via SNMP, put in sysinfo hash
3740 sub get_snmp_about {
3743 '1.3.6.1.4.1.674.10892.1.100.10.0' => 'systemManagementSoftwareGlobalVersionName',
3745 my $systemManagementSoftwareGroup = '1.3.6.1.4.1.674.10892.1.100';
3746 my $result = $snmp_session->get_table(-baseoid => $systemManagementSoftwareGroup);
3747 if (defined $result) {
3748 foreach my $oid (keys %{ $result }) {
3749 if (exists $omsa_oid{$oid} and $omsa_oid{$oid} eq 'systemManagementSoftwareGlobalVersionName') {
3750 $sysinfo{om} = ($result->{$oid});
3755 my $msg = sprintf 'SNMP ERROR getting OMSA info: %s',
3756 $snmp_session->error;
3757 report('other', $msg, $E_UNKNOWN);
3763 # Collects some information about the system
3767 # Get system model and serial number
3768 $snmp ? get_snmp_chassis_info() : get_omreport_chassis_info();
3770 # Get BIOS information. Only if needed
3771 if ( $opt{okinfo} >= 1
3773 or (defined $opt{postmsg} and $opt{postmsg} =~ m/[%][bd]/xms) ) {
3774 $snmp ? get_snmp_chassis_bios() : get_omreport_chassis_bios();
3777 # Get OMSA information. Only if needed
3778 if ($opt{okinfo} >= 3 or $opt{debug}) {
3779 $snmp ? get_snmp_about() : get_omreport_about();
3782 # Return now if debug
3783 return if $opt{debug};
3785 # Get OS information. Only if needed
3786 if (defined $opt{postmsg} and $opt{postmsg} =~ m/[%][or]/xms) {
3787 $snmp ? get_snmp_system_operatingsystem() : get_omreport_system_operatingsystem();
3794 # Helper function for running omreport when the results are strictly
3796 sub run_omreport_info {
3797 my $command = shift;
3801 # Run omreport and fetch output
3802 my $rawtext = slurp_command("$omreport $command -fmt ssv 2>&1");
3804 # Parse output, store in array
3805 for ((split /\n/xms, $rawtext)) {
3806 if (m/\A Error/xms) {
3807 my $msg = "Problem running 'omreport $command': $_";
3808 report('other', $msg, $E_UNKNOWN);
3810 next if !m/;/xms; # ignore lines with less than two fields
3811 my @vals = split m/;/xms;
3812 $output{$vals[0]} = $vals[1];
3815 # Finally, return the collected information
3819 # Get various firmware information (BMC, RAC)
3820 sub get_firmware_info {
3821 my @snmp_output = ();
3822 my %nrpe_output = ();
3827 '1.3.6.1.4.1.674.10892.1.300.60.1.7.1' => 'firmwareType',
3828 '1.3.6.1.4.1.674.10892.1.300.60.1.8.1' => 'firmwareTypeName',
3829 '1.3.6.1.4.1.674.10892.1.300.60.1.11.1' => 'firmwareVersionName',
3832 my $firmwareTable = '1.3.6.1.4.1.674.10892.1.300.60.1';
3833 my $result = $snmp_session->get_table(-baseoid => $firmwareTable);
3835 # Some don't have this OID, this is ok
3836 if (!defined $result) {
3840 @snmp_output = @{ get_snmp_output($result, \%fw_oid) };
3843 %nrpe_output = %{ run_omreport_info("$omopt_chassis info") };
3846 my %fw_type # Firmware types
3848 1 => 'other', # other than following values
3849 2 => 'unknown', # unknown
3850 3 => 'systemBIOS', # System BIOS
3851 4 => 'embeddedSystemManagementController', # Embedded System Management Controller
3852 5 => 'powerSupplyParallelingBoard', # Power Supply Paralleling Board
3853 6 => 'systemBackPlane', # System (Primary) Backplane
3854 7 => 'powerVault2XXSKernel', # PowerVault 2XXS Kernel
3855 8 => 'powerVault2XXSApplication', # PowerVault 2XXS Application
3856 9 => 'frontPanel', # Front Panel Controller
3857 10 => 'baseboardManagementController', # Baseboard Management Controller
3858 11 => 'hotPlugPCI', # Hot Plug PCI Controller
3859 12 => 'sensorData', # Sensor Data Records
3860 13 => 'peripheralBay', # Peripheral Bay Backplane
3861 14 => 'secondaryBackPlane', # Secondary Backplane for ESM 2 systems
3862 15 => 'secondaryBackPlaneESM3And4', # Secondary Backplane for ESM 3 and 4 systems
3863 16 => 'rac', # Remote Access Controller
3864 17 => 'imc' # Integrated Management Controller
3869 foreach my $out (@snmp_output) {
3870 if ($fw_type{$out->{firmwareType}} eq 'baseboardManagementController') {
3871 $sysinfo{'bmc'} = 1;
3872 $sysinfo{'bmc_fw'} = $out->{firmwareVersionName};
3874 elsif ($fw_type{$out->{firmwareType}} =~ m{\A rac|imc \z}xms) {
3875 my $name = $out->{firmwareTypeName}; $name =~ s/\s//gxms;
3876 $sysinfo{'rac'} = 1;
3877 $sysinfo{'rac_name'} = $name;
3878 $sysinfo{'rac_fw'} = $out->{firmwareVersionName};
3883 foreach my $key (keys %nrpe_output) {
3884 next if !defined $nrpe_output{$key};
3885 if ($key eq 'BMC Version' or $key eq 'Baseboard Management Controller Version') {
3886 $sysinfo{'bmc'} = 1;
3887 $sysinfo{'bmc_fw'} = $nrpe_output{$key};
3889 elsif ($key =~ m{\A (i?DRAC)\s*(\d?)\s+Version}xms) {
3891 $sysinfo{'rac'} = 1;
3892 $sysinfo{'rac_fw'} = $nrpe_output{$key};
3893 $sysinfo{'rac_name'} = $name;
3903 #=====================================================================
3905 #=====================================================================
3907 # Here we do the actual checking of components
3908 # Check global status if applicable
3910 $globalstatus = check_global();
3913 # Do multiple selected checks
3914 if ($check{storage}) { check_storage(); }
3915 if ($check{memory}) { check_memory(); }
3916 if ($check{fans}) { check_fans(); }
3917 if ($check{power}) { check_powersupplies(); }
3918 if ($check{temp}) { check_temperatures(); }
3919 if ($check{cpu}) { check_processors(); }
3920 if ($check{voltage}) { check_volts(); }
3921 if ($check{batteries}) { check_batteries(); }
3922 if ($check{amperage}) { check_pwrmonitoring(); }
3923 if ($check{intrusion}) { check_intrusion(); }
3924 if ($check{alertlog}) { check_alertlog(); }
3925 if ($check{esmlog}) { check_esmlog(); }
3926 if ($check{esmhealth}) { check_esmlog_health(); }
3929 #---------------------------------------------------------------------
3931 #---------------------------------------------------------------------
3942 # Get system information
3945 # Get firmware info if requested via option
3946 if ($opt{okinfo} >= 1) {
3947 get_firmware_info();
3950 # Close SNMP session
3952 $snmp_session->close;
3957 print " System: $sysinfo{model}\n";
3958 print " ServiceTag: $sysinfo{serial}";
3959 print q{ } x (25 - length $sysinfo{serial}), "OMSA version: $sysinfo{om}\n";
3960 print " BIOS/date: $sysinfo{bios} $sysinfo{biosdate}";
3961 print q{ } x (25 - length "$sysinfo{bios} $sysinfo{biosdate}"), "Plugin version: $VERSION\n";
3962 if ($#report_storage >= 0) {
3963 print "-----------------------------------------------------------------------------\n";
3964 print " Storage Components \n";
3965 print "=============================================================================\n";
3966 print " STATE | ID | MESSAGE TEXT \n";
3967 print "---------+----------+--------------------------------------------------------\n";
3968 foreach (@report_storage) {
3969 my ($msg, $level, $nexus) = @{$_};
3970 print q{ } x (8 - length $reverse_exitcode{$level}) . "$reverse_exitcode{$level} | "
3971 . q{ } x (8 - length $nexus) . "$nexus | $msg\n";
3972 $nagios_alert_count{$reverse_exitcode{$level}}++;
3975 if ($#report_chassis >= 0) {
3976 print "-----------------------------------------------------------------------------\n";
3977 print " Chassis Components \n";
3978 print "=============================================================================\n";
3979 print " STATE | ID | MESSAGE TEXT \n";
3980 print "---------+------+------------------------------------------------------------\n";
3981 foreach (@report_chassis) {
3982 my ($msg, $level, $nexus) = @{$_};
3983 print q{ } x (8 - length $reverse_exitcode{$level}) . "$reverse_exitcode{$level} | "
3984 . q{ } x (4 - length $nexus) . "$nexus | $msg\n";
3985 $nagios_alert_count{$reverse_exitcode{$level}}++;
3988 if ($#report_other >= 0) {
3989 print "-----------------------------------------------------------------------------\n";
3990 print " Other messages \n";
3991 print "=============================================================================\n";
3992 print " STATE | MESSAGE TEXT \n";
3993 print "---------+-------------------------------------------------------------------\n";
3994 foreach (@report_other) {
3995 my ($msg, $level, $nexus) = @{$_};
3996 print q{ } x (8 - length $reverse_exitcode{$level}) . "$reverse_exitcode{$level} | $msg\n";
3997 $nagios_alert_count{$reverse_exitcode{$level}}++;
4002 my $c = 0; # counter to determine linebreaks
4004 # Run through each message, sorted by severity level
4006 foreach (sort {$a->[1] < $b->[1]} (@report_storage, @report_chassis, @report_other)) {
4007 my ($msg, $level, $nexus) = @{ $_ };
4008 next ALERT if $level == $E_OK;
4010 if (defined $opt{only}) {
4011 # If user wants only critical alerts
4012 next ALERT if ($opt{only} eq 'critical' and $level == $E_WARNING);
4014 # If user wants only warning alerts
4015 next ALERT if ($opt{only} eq 'warning' and $level == $E_CRITICAL);
4018 # Prefix with service tag if specified with option '-i|--info'
4020 if (defined $opt{htmlinfo}) {
4021 $msg = '[<a href="' . warranty_url($sysinfo{serial})
4022 . "\">$sysinfo{serial}</a>] " . $msg;
4025 $msg = "[$sysinfo{serial}] " . $msg;
4029 # Prefix with nagios level if specified with option '--state'
4030 $msg = $reverse_exitcode{$level} . ": $msg" if $opt{state};
4032 # Prefix with one-letter nagios level if specified with option '--short-state'
4033 $msg = (substr $reverse_exitcode{$level}, 0, 1) . ": $msg" if $opt{shortstate};
4035 ($c++ == 0) ? print $msg : print $linebreak, $msg;
4037 $nagios_alert_count{$reverse_exitcode{$level}}++;
4041 # Determine our exit code
4043 $exit_code = $E_UNKNOWN if $nagios_alert_count{'UNKNOWN'} > 0;
4044 $exit_code = $E_WARNING if $nagios_alert_count{'WARNING'} > 0;
4045 $exit_code = $E_CRITICAL if $nagios_alert_count{'CRITICAL'} > 0;
4047 # Global status via SNMP.. extra safety check
4048 if ($globalstatus != $E_OK && $exit_code == $E_OK && !defined $opt{only}) {
4049 print "OOPS! Something is wrong with this server, but I don't know what. ";
4050 print "The global system health status is $reverse_exitcode{$globalstatus}, ";
4051 print "but every component check is OK. This may be a bug in the Nagios plugin, ";
4052 print "please file a bug report.\n";
4057 if ($exit_code == $E_OK && defined $opt{only} && $opt{only} !~ m{\A critical|warning|chassis \z}xms && !$opt{debug}) {
4059 = ( 'storage' => "STORAGE OK - $count{pdisk} physical drives, $count{vdisk} logical drives",
4060 'fans' => $count{fan} == 0 && $blade ? 'OK - blade system with no fan probes' : "FANS OK - $count{fan} fan probes checked",
4061 'temp' => "TEMPERATURES OK - $count{temp} temperature probes checked",
4062 'memory' => "MEMORY OK - $count{dimm} memory modules checked",
4063 'power' => $count{power} == 0 ? 'OK - no instrumented power supplies found' : "POWER OK - $count{power} power supplies checked",
4064 'cpu' => "PROCESSORS OK - $count{cpu} processors checked",
4065 'voltage' => "VOLTAGE OK - $count{volt} voltage probes checked",
4066 'batteries' => $count{bat} == 0 ? 'OK - no batteries found' : "BATTERIES OK - $count{bat} batteries checked",
4067 'amperage' => $count{amp} == 0 ? 'OK - no power monitoring probes found' : "AMPERAGE OK - $count{amp} amperage (power monitoring) probes checked",
4068 'intrusion' => $count{intr} == 0 ? 'OK - no intrusion detection probes found' : "INTRUSION OK - $count{intr} intrusion detection probes checked",
4069 'alertlog' => $snmp ? 'OK - not supported via snmp' : "OK - Alert Log content: $count{alert}{Ok} ok, $count{alert}{'Non-Critical'} warning and $count{alert}{Critical} critical",
4070 'esmlog' => "OK - ESM Log content: $count{esm}{Ok} ok, $count{esm}{'Non-Critical'} warning and $count{esm}{Critical} critical",
4071 'esmhealth' => "ESM LOG OK - less than 80% used",
4074 print $okmsg{$opt{only}};
4076 elsif ($exit_code == $E_OK && !$opt{debug}) {
4077 if (defined $opt{htmlinfo}) {
4078 printf q{OK - System: '<a href="%s">%s</a>', SN: '<a href="%s">%s</a>', hardware working fine},
4079 documentation_url($sysinfo{model}), $sysinfo{model},
4080 warranty_url($sysinfo{serial}), $sysinfo{serial};
4083 printf q{OK - System: '%s', SN: '%s', hardware working fine},
4084 $sysinfo{model}, $sysinfo{serial};
4087 if ($check{storage}) {
4088 printf ', %d logical drives, %d physical drives',
4089 $count{vdisk}, $count{pdisk};
4092 print ', not checking storage';
4095 if ($opt{okinfo} >= 1) {
4097 printf q{----- BIOS='%s %s'}, $sysinfo{bios}, $sysinfo{biosdate};
4099 if ($sysinfo{rac}) {
4100 printf q{, %s='%s'}, $sysinfo{rac_name}, $sysinfo{rac_fw};
4102 if ($sysinfo{bmc}) {
4103 printf q{, BMC='%s'}, $sysinfo{bmc_fw};
4107 if ($opt{okinfo} >= 2) {
4108 if ($check{storage}) {
4109 my @storageprint = ();
4110 foreach my $id (sort keys %{ $sysinfo{controller} }) {
4111 chomp $sysinfo{controller}{$id}{driver};
4112 my $msg = sprintf q{----- Ctrl %s [%s]: Fw='%s', Dr='%s'},
4113 $sysinfo{controller}{$id}{id}, $sysinfo{controller}{$id}{name},
4114 $sysinfo{controller}{$id}{firmware}, $sysinfo{controller}{$id}{driver};
4115 if (defined $sysinfo{controller}{$id}{storport}) {
4116 $msg .= sprintf q{, Storport: '%s'}, $sysinfo{controller}{$id}{storport};
4118 push @storageprint, $msg;
4120 foreach my $id (sort keys %{ $sysinfo{enclosure} }) {
4121 push @storageprint, sprintf q{----- Encl %s [%s]: Fw='%s'},
4122 $sysinfo{enclosure}{$id}->{id}, $sysinfo{enclosure}{$id}->{name},
4123 $sysinfo{enclosure}{$id}->{firmware};
4127 foreach my $line (@storageprint) {
4128 print $linebreak, $line;
4133 if ($opt{okinfo} >= 3) {
4134 print "$linebreak----- OpenManage Server Administrator (OMSA) version: '$sysinfo{om}'";
4139 if ($opt{extinfo}) {
4141 if (defined $opt{htmlinfo}) {
4142 printf '------ SYSTEM: <a href="%s">%s</a>, SN: <a href="%s">%s</a>',
4143 documentation_url($sysinfo{model}), $sysinfo{model},
4144 warranty_url($sysinfo{serial}), $sysinfo{serial};
4147 printf '------ SYSTEM: %s, SN: %s',
4148 $sysinfo{model}, $sysinfo{serial};
4151 if (defined $opt{postmsg}) {
4153 if (-f $opt{postmsg}) {
4154 open my $POST, '<', $opt{postmsg}
4155 or ( print $linebreak
4156 and print "ERROR: Couldn't open post message file $opt{postmsg}: $!\n"
4157 and exit $E_UNKNOWN );
4163 $post = $opt{postmsg};
4165 if (defined $post) {
4167 $post =~ s{[%]s}{$sysinfo{serial}}gxms;
4168 $post =~ s{[%]m}{$sysinfo{model}}gxms;
4169 $post =~ s{[%]b}{$sysinfo{bios}}gxms;
4170 $post =~ s{[%]d}{$sysinfo{biosdate}}gxms;
4171 $post =~ s{[%]o}{$sysinfo{osname}}gxms;
4172 $post =~ s{[%]r}{$sysinfo{osver}}gxms;
4173 $post =~ s{[%]p}{$count{pdisk}}gxms;
4174 $post =~ s{[%]l}{$count{vdisk}}gxms;
4175 $post =~ s{[%]n}{$linebreak}gxms;
4176 $post =~ s{[%]{2}}{%}gxms;
4182 # Print any perl warnings that have occured
4183 if (@perl_warnings) {
4184 foreach (@perl_warnings) {
4186 print "${linebreak}INTERNAL ERROR: @$_";
4188 $exit_code = $E_UNKNOWN;
4191 # Reset the WARN signal
4192 $SIG{__WARN__} = $original_sigwarn;
4194 # Print performance data
4195 if (defined $opt{perfdata} && !$opt{debug} && %perfdata) {
4196 my $lb = $opt{perfdata} eq 'multiline' ? "\n" : q{ }; # line break for perfdata
4207 return ($order{(split /_/, $a, 2)[0]} cmp $order{(split /_/, $b, 2)[0]}) || $a cmp $b;
4210 print join $lb, map { "'$_'=$perfdata{$_}" } sort perfdata keys %perfdata;
4213 # Print a linebreak at the end
4214 print "\n" if !$opt{debug};
4216 # Exit with proper exit code