5 # Monitor Dell server hardware status using Dell OpenManage Server
6 # Administrator, either locally via NRPE, or remotely via SNMP.
10 # Copyright (C) 2010 Trond H. Amundsen
12 # This program is free software: you can redistribute it and/or modify
13 # it under the terms of the GNU General Public License as published by
14 # the Free Software Foundation, either version 3 of the License, or
15 # (at your option) any later version.
17 # This program is distributed in the hope that it will be useful, but
18 # WITHOUT ANY WARRANTY; without even the implied warranty of
19 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 # General Public License for more details.
22 # You should have received a copy of the GNU General Public License
23 # along with this program. If not, see <http://www.gnu.org/licenses/>.
26 require 5.006; # Perl v5.6.0 or newer is required
29 use POSIX qw(isatty ceil);
30 use Getopt::Long qw(:config no_ignore_case);
32 # Global (package) variables used throughout the code
33 use vars qw( $NAME $VERSION $AUTHOR $CONTACT $E_OK $E_WARNING $E_CRITICAL
34 $E_UNKNOWN $FW_LOCK $USAGE $HELP $LICENSE
35 $snmp_session $snmp_error $omreport $globalstatus $global
36 $linebreak $omopt_chassis $omopt_system $blade
38 %check %opt %reverse_exitcode %status2nagios
39 %snmp_status %snmp_probestatus %probestatus2nagios %sysinfo
40 %blacklist %nagios_alert_count %count %snmp_enclosure %snmp_controller
41 @perl_warnings @controllers @enclosures @perfdata
42 @report_storage @report_chassis @report_other
45 #---------------------------------------------------------------------
46 # Initialization and global variables
47 #---------------------------------------------------------------------
49 # Collect perl warnings in an array
50 $SIG{__WARN__} = sub { push @perl_warnings, [@_]; };
52 # Version and similar info
53 $NAME = 'check_openmanage';
54 $VERSION = '3.6.0-beta1';
55 $AUTHOR = 'Trond H. Amundsen';
56 $CONTACT = 't.h.amundsen@usit.uio.no';
64 # Firmware update lock file [FIXME: location on Windows?]
65 $FW_LOCK = '/var/lock/.spsetup'; # default on Linux
68 $USAGE = <<"END_USAGE";
69 Usage: $NAME [OPTION]...
77 -p, --perfdata Output performance data
78 -t, --timeout Plugin timeout in seconds
79 -c, --critical Customise temperature critical limits
80 -w, --warning Customise temperature warning limits
81 -d, --debug Debug output, reports everything
82 -h, --help Display this help text
83 -V, --version Display version info
87 -H, --hostname Hostname or IP (required for SNMP)
88 -C, --community SNMP community string
89 -P, --protocol SNMP protocol version
90 --port SNMP port number
94 -i, --info Prefix any alerts with the service tag
95 -e, --extinfo Append system info to alerts
96 -s, --state Prefix alerts with alert state
97 -S, --short-state Prefix alerts with alert state (abbreviated)
98 -o, --okinfo Verbosity when check result is OK
99 -I, --htmlinfo HTML output with clickable links
101 CHECK CONTROL AND BLACKLISTING:
103 -a, --all Check everything, even log content
104 -b, --blacklist Blacklist missing and/or failed components
105 --only Only check a certain component or alert type
106 --check Fine-tune which components are checked
108 For more information and advanced options, see the manual page or URL:
109 http://folk.uio.no/trondham/software/check_openmanage.html
112 # Version and license text
113 $LICENSE = <<"END_LICENSE";
115 Copyright (C) 2010 $AUTHOR
116 License GPLv3+: GNU GPL version 3 or later <http://gnu.org/licenses/gpl.html>
117 This is free software: you are free to change and redistribute it.
118 There is NO WARRANTY, to the extent permitted by law.
120 Written by $AUTHOR <$CONTACT>
123 # Options with default values
124 %opt = ( 'blacklist' => [], # blacklisting
125 'check' => [], # check control
126 'critical' => [], # temperature critical limits
127 'warning' => [], # temperature warning limits
128 'timeout' => 30, # default timeout is 30 seconds
129 'debug' => 0, # debugging / verbose output
130 'help' => 0, # display help output
131 'perfdata' => undef, # output performance data
132 'info' => 0, # display servicetag
133 'extinfo' => 0, # display extra info
134 'htmlinfo' => undef, # html tags in output
135 'postmsg' => undef, # post message
136 'state' => 0, # display alert type
137 'short-state' => 0, # display alert type (short)
138 'okinfo' => 0, # default "ok" output level
139 'linebreak' => undef, # specify linebreak
140 'version' => 0, # plugin version info
141 'all' => 0, # check everything
142 'only' => undef, # only one component
143 'omreport' => undef, # omreport path
144 'port' => 161, # default SNMP port
145 'hostname' => undef, # hostname or IP
146 'community' => 'public', # SMNP v1 or v2c
147 'protocol' => 2, # default SNMP protocol 2c
148 'username' => undef, # SMNP v3
149 'authpassword' => undef, # SMNP v3
150 'authkey' => undef, # SMNP v3
151 'authprotocol' => undef, # SMNP v3
152 'privpassword' => undef, # SMNP v3
153 'privkey' => undef, # SMNP v3
154 'privprotocol' => undef, # SMNP v3
155 'use_get_table' => 0, # hack for SNMPv3 on Windows with net-snmp
159 GetOptions('b|blacklist=s' => \@{ $opt{blacklist} },
160 'check=s' => \@{ $opt{check} },
161 'c|critical=s' => \@{ $opt{critical} },
162 'w|warning=s' => \@{ $opt{warning} },
163 't|timeout=i' => \$opt{timeout},
164 'd|debug' => \$opt{debug},
165 'h|help' => \$opt{help},
166 'V|version' => \$opt{version},
167 'p|perfdata:s' => \$opt{perfdata},
168 'i|info' => \$opt{info},
169 'e|extinfo' => \$opt{extinfo},
170 'I|htmlinfo:s' => \$opt{htmlinfo},
171 'postmsg=s' => \$opt{postmsg},
172 's|state' => \$opt{state},
173 'S|short-state' => \$opt{shortstate},
174 'o|ok-info=i' => \$opt{okinfo},
175 'linebreak=s' => \$opt{linebreak},
176 'a|all' => \$opt{all},
177 'only=s' => \$opt{only},
178 'omreport=s' => \$opt{omreport},
179 'port=i' => \$opt{port},
180 'H|hostname=s' => \$opt{hostname},
181 'C|community=s' => \$opt{community},
182 'P|protocol=i' => \$opt{protocol},
183 'U|username=s' => \$opt{username},
184 'authpassword=s' => \$opt{authpassword},
185 'authkey=s' => \$opt{authkey},
186 'authprotocol=s' => \$opt{authprotocol},
187 'privpassword=s' => \$opt{privpassword},
188 'privkey=s' => \$opt{privkey},
189 'privprotocol=s' => \$opt{privprotocol},
190 'use-get_table' => \$opt{use_get_table},
191 ) or do { print $USAGE; exit $E_UNKNOWN };
193 # If user requested help
199 # If user requested version info
207 print "PLUGIN TIMEOUT: $NAME timed out after $opt{timeout} seconds\n";
212 # If we're using SNMP
213 $snmp = defined $opt{hostname} ? 1 : 0;
215 # SNMP session variables
216 $snmp_session = undef;
219 # The omreport command
222 # Check flags, override available with the --check option
223 %check = ( 'storage' => 1, # check storage subsystem
224 'memory' => 1, # check memory (dimms)
225 'fans' => 1, # check fan status
226 'power' => 1, # check power supplies
227 'temp' => 1, # check temperature
228 'cpu' => 1, # check processors
229 'voltage' => 1, # check voltage
230 'batteries' => 1, # check battery probes
231 'amperage' => 1, # check power consumption
232 'intrusion' => 1, # check intrusion detection
233 'alertlog' => 0, # check the alert log
234 'esmlog' => 0, # check the ESM log (hardware log)
235 'esmhealth' => 1, # check the ESM log overall health
239 $linebreak = isatty(*STDOUT) ? "\n" : '<br/>';
241 # Line break from option
242 if (defined $opt{linebreak}) {
243 if ($opt{linebreak} eq 'REG') {
246 elsif ($opt{linebreak} eq 'HTML') {
247 $linebreak = '<br/>';
250 $linebreak = $opt{linebreak};
254 # Exit with status=UNKNOWN if there is firmware upgrade in progress
255 if (!$snmp && -f $FW_LOCK) {
256 print "MONITORING DISABLED - Firmware update in progress ($FW_LOCK exists)\n";
260 # List of controllers and enclosures
261 @controllers = (); # controllers
262 @enclosures = (); # enclosures
263 %snmp_enclosure = (); # enclosures
266 @report_storage = (); # messages with associated nagios level (storage)
267 @report_chassis = (); # messages with associated nagios level (chassis)
268 @report_other = (); # messages with associated nagios level (other)
270 # Counters for everything
273 'pdisk' => 0, # number of physical disks
274 'vdisk' => 0, # number of logical drives (virtual disks)
275 'temp' => 0, # number of temperature probes
276 'volt' => 0, # number of voltage probes
277 'amp' => 0, # number of amperage probes
278 'intr' => 0, # number of intrusion probes
279 'dimm' => 0, # number of memory modules
280 'mem' => 0, # total memory
281 'fan' => 0, # number of fan probes
282 'cpu' => 0, # number of CPUs
283 'bat' => 0, # number of batteries
284 'power' => 0, # number of power supplies
286 'Critical' => 0, # critical entries in ESM log
287 'Non-Critical' => 0, # warning entries in ESM log
288 'Ok' => 0, # ok entries in ESM log
291 'Critical' => 0, # critical entries in alert log
292 'Non-Critical' => 0, # warning entries in alert log
293 'Ok' => 0, # ok entries in alert log
300 # Global health status
301 $global = 1; # default is to check global status
302 $globalstatus = $E_OK; # default global health status is "OK"
304 # Nagios error levels reversed
308 $E_WARNING => 'WARNING',
309 $E_CRITICAL => 'CRITICAL',
310 $E_UNKNOWN => 'UNKNOWN',
313 # OpenManage (omreport) and SNMP error levels
316 'Unknown' => $E_CRITICAL,
317 'Critical' => $E_CRITICAL,
318 'Non-Critical' => $E_WARNING,
320 'Non-Recoverable' => $E_CRITICAL,
321 'Other' => $E_CRITICAL,
332 6 => 'Non-Recoverable',
335 # Probe Status via SNMP
338 1 => 'Other', # probe status is not one of the following:
339 2 => 'Unknown', # probe status is unknown (not known or monitored)
340 3 => 'Ok', # probe is reporting a value within the thresholds
341 4 => 'nonCriticalUpper', # probe has crossed upper noncritical threshold
342 5 => 'criticalUpper', # probe has crossed upper critical threshold
343 6 => 'nonRecoverableUpper', # probe has crossed upper non-recoverable threshold
344 7 => 'nonCriticalLower', # probe has crossed lower noncritical threshold
345 8 => 'criticalLower', # probe has crossed lower critical threshold
346 9 => 'nonRecoverableLower', # probe has crossed lower non-recoverable threshold
347 10 => 'failed', # probe is not functional
350 # Probe status translated to Nagios alarm levels
353 'Other' => $E_CRITICAL,
354 'Unknown' => $E_CRITICAL,
356 'nonCriticalUpper' => $E_WARNING,
357 'criticalUpper' => $E_CRITICAL,
358 'nonRecoverableUpper' => $E_CRITICAL,
359 'nonCriticalLower' => $E_WARNING,
360 'criticalLower' => $E_CRITICAL,
361 'nonRecoverableLower' => $E_CRITICAL,
362 'failed' => $E_CRITICAL,
365 # System information gathered
368 'bios' => 'N/A', # BIOS version
369 'biosdate' => 'N/A', # BIOS release date
370 'serial' => 'N/A', # serial number (service tag)
371 'model' => 'N/A', # system model
372 'rev' => q{}, # system revision
373 'osname' => 'N/A', # OS name
374 'osver' => 'N/A', # OS version
375 'om' => 'N/A', # OMSA version
376 'bmc' => 0, # HAS baseboard management controller (BMC)
377 'rac' => 0, # HAS remote access controller (RAC)
378 'rac_name' => 'N/A', # remote access controller (RAC)
379 'bmc_fw' => 'N/A', # BMC firmware
380 'rac_fw' => 'N/A', # RAC firmware
383 # Adjust which checks to perform
384 adjust_checks() if defined $opt{check};
386 # Blacklisted components
387 %blacklist = defined $opt{blacklist} ? %{ get_blacklist() } : ();
389 # If blacklisting is in effect, don't check global health status
390 if (scalar keys %blacklist > 0) {
394 # Take into account new hardware and blades
395 $omopt_chassis = 'chassis'; # default "chassis" option to omreport
396 $omopt_system = 'system'; # default "system" option to omreport
397 $blade = 0; # if this is a blade system
399 # Some initializations and checking before we begin
401 snmp_initialize(); # initialize SNMP
402 snmp_check(); # check that SNMP works
403 snmp_detect_blade(); # detect blade via SNMP
406 # Find the omreport binary
408 # Check help output from omreport, see which options are available.
409 # Also detecting blade via omreport.
410 check_omreport_options();
414 #---------------------------------------------------------------------
416 #---------------------------------------------------------------------
419 # Store a message in one of the message arrays
422 my ($type, $msg, $exval, $id) = @_;
423 defined $id or $id = q{};
427 'storage' => \@report_storage,
428 'chassis' => \@report_chassis,
429 'other' => \@report_other,
432 return push @{ $type2array{$type} }, [ $msg, $exval, $id ];
437 # Run command, put resulting output lines in an array and return a
438 # pointer to that array
443 open my $CMD, '-|', $command
444 or do { report('other', "Couldn't run command '$command': $!", $E_UNKNOWN)
448 or do { report('other', "Couldn't close filehandle for command '$command': $!", $E_UNKNOWN)
449 and return \@lines };
454 # Run command, put resulting output in a string variable and return it
459 open my $CMD, '-|', $command
460 or do { report('other', "Couldn't run command '$command': $!", $E_UNKNOWN) and return };
461 my $rawtext = do { local $/ = undef; <$CMD> }; # slurping
464 # NOTE: We don't check the return value of close() since omreport
465 # does something weird sometimes.
473 sub snmp_initialize {
474 # Legal SNMP v3 protocols
475 my $snmp_v3_privprotocol = qr{\A des|aes|aes128|3des|3desde \z}xms;
476 my $snmp_v3_authprotocol = qr{\A md5|sha \z}xms;
478 # Parameters to Net::SNMP->session()
481 '-port' => $opt{port},
482 '-hostname' => $opt{hostname},
483 '-version' => $opt{protocol},
486 # Parameters for SNMP v3
487 if ($opt{protocol} == 3) {
489 # Username is mandatory
490 if (defined $opt{username}) {
491 $param{'-username'} = $opt{username};
494 print "SNMP ERROR: With SNMPv3 the username must be specified\n";
498 # Authpassword is optional
499 if (defined $opt{authpassword}) {
500 $param{'-authpassword'} = $opt{authpassword};
503 # Authkey is optional
504 if (defined $opt{authkey}) {
505 $param{'-authkey'} = $opt{authkey};
508 # Privpassword is optional
509 if (defined $opt{privpassword}) {
510 $param{'-privpassword'} = $opt{privpassword};
513 # Privkey is optional
514 if (defined $opt{privkey}) {
515 $param{'-privkey'} = $opt{privkey};
518 # Privprotocol is optional
519 if (defined $opt{privprotocol}) {
520 if ($opt{privprotocol} =~ m/$snmp_v3_privprotocol/xms) {
521 $param{'-privprotocol'} = $opt{privprotocol};
524 print "SNMP ERROR: Unknown privprotocol '$opt{privprotocol}', "
525 . "must be one of [des|aes|aes128|3des|3desde]\n";
530 # Authprotocol is optional
531 if (defined $opt{authprotocol}) {
532 if ($opt{authprotocol} =~ m/$snmp_v3_authprotocol/xms) {
533 $param{'-authprotocol'} = $opt{authprotocol};
536 print "SNMP ERROR: Unknown authprotocol '$opt{authprotocol}', "
537 . "must be one of [md5|sha]\n";
542 # Parameters for SNMP v2c or v1
543 elsif ($opt{protocol} == 2 or $opt{protocol} == 1) {
544 $param{'-community'} = $opt{community};
547 print "SNMP ERROR: Unknown SNMP version '$opt{protocol}'\n";
551 # Try to initialize the SNMP session
552 if ( eval { require Net::SNMP; 1 } ) {
553 ($snmp_session, $snmp_error) = Net::SNMP->session( %param );
554 if (!defined $snmp_session) {
555 printf "SNMP: %s\n", $snmp_error;
560 print "ERROR: You need perl module Net::SNMP to run $NAME in SNMP mode\n";
567 # Checking if SNMP works by probing for "chassisModelName", which all
568 # servers should have
571 my $chassisModelName = '1.3.6.1.4.1.674.10892.1.300.10.1.9.1';
572 my $result = $snmp_session->get_request(-varbindlist => [$chassisModelName]);
574 # Typically if remote host isn't responding
575 if (!defined $result) {
576 printf "SNMP CRITICAL: %s\n", $snmp_session->error;
580 # If OpenManage isn't installed or is not working
581 if ($result->{$chassisModelName} =~ m{\A noSuch (Instance|Object) \z}xms) {
582 print "ERROR: (SNMP) OpenManage is not installed or is not working correctly\n";
589 # Detecting blade via SNMP
591 sub snmp_detect_blade {
592 my $DellBaseBoardType = '1.3.6.1.4.1.674.10892.1.300.80.1.7.1.1';
593 my $result = $snmp_session->get_request(-varbindlist => [$DellBaseBoardType]);
595 # Identify blade. Older models (4th and 5th gen models) and/or old
596 # OMSA (4.x) don't have this OID. If we get "noSuchInstance" or
597 # similar, we assume that this isn't a blade
598 if (exists $result->{$DellBaseBoardType} && $result->{$DellBaseBoardType} eq '3') {
605 # Locate the omreport binary
608 # If user has specified path to omreport
609 if (defined $opt{omreport} and -x $opt{omreport}) {
610 $omreport = qq{"$opt{omreport}"};
614 # Possible full paths for omreport
617 '/opt/dell/srvadmin/bin/omreport', # default on Linux with OMSA >= 6.2.0
618 '/usr/bin/omreport', # default on Linux with OMSA < 6.2.0
619 '/opt/dell/srvadmin/oma/bin/omreport.sh', # alternate on Linux
620 '/opt/dell/srvadmin/oma/bin/omreport', # alternate on Linux
621 'C:\Program Files (x86)\Dell\SysMgt\oma\bin\omreport.exe', # default on Windows x64
622 'C:\Program Files\Dell\SysMgt\oma\bin\omreport.exe', # default on Windows x32
623 'c:\progra~1\dell\sysmgt\oma\bin\omreport.exe', # 8bit legacy default on Windows x32
624 'c:\progra~2\dell\sysmgt\oma\bin\omreport.exe', # 8bit legacy default on Windows x64
627 # Find the one to use
629 foreach my $bin (@omreport_paths) {
631 $omreport = qq{"$bin"};
636 # Exit with status=UNKNOWN if OM is not installed, or we don't
637 # have permission to execute the binary
638 if (!defined $omreport) {
639 print "ERROR: Dell OpenManage Server Administrator (OMSA) is not installed\n";
646 # Checks output from 'omreport -?' and searches for arguments to
647 # omreport, to accommodate deprecated options "chassis" and "system"
648 # (on newer hardware), as well as blade servers.
650 sub check_omreport_options {
651 foreach (@{ run_command("$omreport -? 2>&1") }) {
652 if (m/\A servermodule /xms) {
653 # If "servermodule" argument to omreport exists, use it
654 # instead of argument "system"
655 $omopt_system = 'servermodule';
657 elsif (m/\A mainsystem /xms) {
658 # If "mainsystem" argument to omreport exists, use it
659 # instead of argument "chassis"
660 $omopt_chassis = 'mainsystem';
662 elsif (m/\A modularenclosure /xms) {
663 # If "modularenclusure" argument to omreport exists, assume
664 # that this is a blade
672 # Read the blacklist option and return a hash containing the
673 # blacklisted components
679 if (scalar @{ $opt{blacklist} } >= 0) {
680 foreach my $black (@{ $opt{blacklist} }) {
683 open my $BL, '<', $black
684 or do { report('other', "Couldn't open blacklist file $black: $!", $E_UNKNOWN)
696 return {} if $#bl < 0;
698 # Parse blacklist string, put in hash
699 foreach my $black (@bl) {
700 my @comps = split m{/}xms, $black;
701 foreach my $c (@comps) {
702 next if $c !~ m/=/xms;
703 my ($key, $val) = split /=/xms, $c;
704 my @vals = split /,/xms, $val;
705 $blacklist{$key} = \@vals;
713 # Read the check option and adjust the hash %check, which is a rough
714 # list of components to be checked
719 # Adjust checking based on the '--all' option
722 if (defined $opt{only} and $opt{only} !~ m{\A critical|warning \z}xms) {
723 print qq{ERROR: Wrong simultaneous usage of the "--all" and "--only" options\n};
726 if (scalar @{ $opt{check} } > 0) {
727 print qq{ERROR: Wrong simultaneous usage of the "--all" and "--check" options\n};
731 # set the check hash to check everything
732 map { $_ = 1 } values %check;
737 # Adjust checking based on the '--only' option
738 if (defined $opt{only} and $opt{only} !~ m{\A critical|warning \z}xms) {
740 if (scalar @{ $opt{check} } > 0) {
741 print qq{ERROR: Wrong simultaneous usage of the "--only" and "--check" options\n};
744 if (! exists $check{$opt{only}} && $opt{only} ne 'chassis') {
745 print qq{ERROR: "$opt{only}" is not a known keyword for the "--only" option\n};
749 # reset the check hash
750 map { $_ = 0 } values %check;
752 # adjust the check hash
753 if ($opt{only} eq 'chassis') {
754 map { $check{$_} = 1 } qw(memory fans power temp cpu voltage
755 batteries amperage intrusion esmhealth);
758 $check{$opt{only}} = 1;
764 # Adjust checking based on the '--check' option
765 if (scalar @{ $opt{check} } >= 0) {
766 foreach my $check (@{ $opt{check} }) {
769 open my $CL, '<', $check
770 or do { report('other', "Couldn't open check file $check: $!", $E_UNKNOWN) and return };
783 # Parse checklist string, put in hash
784 foreach my $check (@cl) {
785 my @checks = split /,/xms, $check;
786 foreach my $c (@checks) {
787 next if $c !~ m/=/xms;
788 my ($key, $val) = split /=/xms, $c;
793 # Check if we should check global health status
795 foreach (keys %check) {
796 next CHECK_KEY if $_ eq 'esmlog'; # not part of global status
797 next CHECK_KEY if $_ eq 'alertlog'; # not part of global status
799 if ($check{$_} == 0) { # found something with checking turned off
809 # Runs omreport and returns an array of anonymous hashes containing
811 # Takes one argument: string containing parameters to omreport
818 # Errors that are OK. Some low-end poweredge (and blades) models
819 # don't have RAID controllers, intrusion detection sensor, or
820 # redundant/instrumented power supplies etc.
823 Intrusion\sinformation\sis\snot\sfound\sfor\sthis\ssystem # No intrusion probe
824 | No\sinstrumented\spower\ssupplies\sfound\son\sthis\ssystem # No instrumented PS (blades/low-end)
825 # | No\scontrollers\sfound # No RAID controller
826 | No\sbattery\sprobes\sfound\son\sthis\ssystem # No battery probes
827 | Invalid\scommand:\spwrmonitoring # Older OMSAs lack this command(?)
828 # | Current\sprobes\snot\sfound # OMSA + RHEL5.4 bug
831 # Errors that are OK on blade servers
834 No\sfan\sprobes\sfound\son\sthis\ssystem # No fan probes
837 # Run omreport and fetch output
838 my $rawtext = slurp_command("$omreport $command -fmt ssv 2>&1");
839 return [] if !defined $rawtext;
841 # Workaround for Openmanage BUG introduced in OMSA 5.5.0
842 $rawtext =~ s{\n;}{;}gxms if $command eq 'storage controller';
844 # Report if no controllers found
845 if ($command eq 'storage controller' and $rawtext =~ m{No\scontrollers\sfound}xms) {
846 report('other', 'No storage controllers found (check OMSA installation)', $E_UNKNOWN);
849 # Openmanage sometimes puts a linebreak between "Error" and the
851 $rawtext =~ s{^Error\s*\n}{Error: }xms;
853 # Parse output, store in array
854 for ((split m{\n}xms, $rawtext)) {
856 next if m{$ok_errors}xms;
857 next if ($blade and m{$ok_blade_errors}xms);
858 report('other', "Problem running 'omreport $command': $_", $E_UNKNOWN);
861 next if !m/(.*?;){2}/xms; # ignore lines with less than 3 fields
862 my @vals = split /;/xms;
863 if ($vals[0] =~ m/\A (Index|ID|Severity|Processor|Current\sSpeed) \z/xms) {
868 push @output, { map { $_ => $vals[$i++] } @keys };
873 # Finally, return the collected information
879 # Checks if a component is blacklisted. Returns 1 if the component is
880 # blacklisted, 0 otherwise. Takes two arguments:
881 # arg1: component name
882 # arg2: component id or index
885 my $name = shift; # component name
886 my $id = shift; # component id
887 my $ret = 0; # return value
889 if (defined $blacklist{$name}) {
890 foreach my $comp (@{ $blacklist{$name} }) {
891 if (defined $id and ($comp eq $id or uc($comp) eq 'ALL')) {
900 # Converts the NexusID from SNMP to our version
903 $nexus =~ s{\A \\}{}xms;
904 $nexus =~ s{\\}{:}gxms;
908 # Sets custom temperature thresholds based on user supplied options
909 sub custom_temperature_thresholds {
910 my $type = shift; # type of threshold, either w (warning) or c (critical)
911 my %thres = (); # will contain the thresholds
912 my @limits = (); # holds the input
914 my @opt = $type eq 'w' ? @{ $opt{warning} } : @{ $opt{critical} };
916 if (scalar @opt >= 0) {
917 foreach my $t (@opt) {
921 or do { report('other', "Couldn't open temperature threshold file $t: $!",
922 $E_UNKNOWN) and return {} };
933 # Parse checklist string, put in hash
934 foreach my $th (@limits) {
935 my @tmp = split m{,}xms, $th;
936 foreach my $t (@tmp) {
937 next if $t !~ m{=}xms;
938 my ($key, $val) = split m{=}xms, $t;
939 if ($val =~ m{/}xms) {
940 my ($max, $min) = split m{/}xms, $val;
941 $thres{$key}{max} = $max;
942 $thres{$key}{min} = $min;
945 $thres{$key}{max} = $val;
954 # Gets the output from SNMP result according to the OIDs checked
955 sub get_snmp_output {
956 my ($result,$oidref) = @_;
960 foreach my $oid (keys %{ $result }) {
962 $short =~ s{\s}{}gxms; # remove whitespace
963 $short =~ s{\A (.+) \. (\d+) \z}{$1}xms; # remove last number
965 if (exists $oidref->{$short}) {
966 $temp[$id]{$oidref->{$short}} = $result->{$oid};
970 # Remove any empty indexes
971 foreach my $out (@temp) {
981 # Map the controller or other item in-place
983 my ($key, $val, $list) = @_;
985 foreach my $lst (@{ $list }) {
986 if (!exists $lst->{$key}) {
993 # Return the URL for official Dell documentation for a specific
995 sub documentation_url {
998 # create model short form, e.g. "r710"
999 $model =~ s{\A PowerEdge \s (.+?) \z}{lc($1)}exms;
1001 # special case for blades (e.g. M600, M710), they have common
1003 $model =~ s{\A m\d+ \z}{m}xms;
1005 return 'http://support.dell.com/support/edocs/systems/pe' . $model . '/';
1008 # Return the URL for warranty information for a server with a given
1009 # serial number (servicetag)
1013 # Dell support sites for different parts of the world
1016 'emea' => 'http://support.euro.dell.com/support/topics/topic.aspx/emea/shared/support/my_systems_info/',
1017 'ap' => 'http://supportapj.dell.com/support/topics/topic.aspx/ap/shared/support/my_systems_info/en/details?',
1018 'glob' => 'http://support.dell.com/support/topics/global.aspx/support/my_systems_info/details?',
1021 # warranty URLs for different country codes
1025 'at' => $supportsite{emea} . 'de/details?c=at&l=de&ServiceTag=', # Austria
1026 'be' => $supportsite{emea} . 'nl/details?c=be&l=nl&ServiceTag=', # Belgium
1027 'cz' => $supportsite{emea} . 'cs/details?c=cz&l=cs&ServiceTag=', # Czech Republic
1028 'de' => $supportsite{emea} . 'de/details?c=de&l=de&ServiceTag=', # Germany
1029 'dk' => $supportsite{emea} . 'da/details?c=dk&l=da&ServiceTag=', # Denmark
1030 'es' => $supportsite{emea} . 'es/details?c=es&l=es&ServiceTag=', # Spain
1031 'fi' => $supportsite{emea} . 'fi/details?c=fi&l=fi&ServiceTag=', # Finland
1032 'fr' => $supportsite{emea} . 'fr/details?c=fr&l=fr&ServiceTag=', # France
1033 'gr' => $supportsite{emea} . 'en/details?c=gr&l=el&ServiceTag=', # Greece
1034 'it' => $supportsite{emea} . 'it/details?c=it&l=it&ServiceTag=', # Italy
1035 'il' => $supportsite{emea} . 'en/details?c=il&l=en&ServiceTag=', # Israel
1036 'me' => $supportsite{emea} . 'en/details?c=me&l=en&ServiceTag=', # Middle East
1037 'no' => $supportsite{emea} . 'no/details?c=no&l=no&ServiceTag=', # Norway
1038 'nl' => $supportsite{emea} . 'nl/details?c=nl&l=nl&ServiceTag=', # The Netherlands
1039 'pl' => $supportsite{emea} . 'pl/details?c=pl&l=pl&ServiceTag=', # Poland
1040 'pt' => $supportsite{emea} . 'en/details?c=pt&l=pt&ServiceTag=', # Portugal
1041 'ru' => $supportsite{emea} . 'ru/details?c=ru&l=ru&ServiceTag=', # Russia
1042 'se' => $supportsite{emea} . 'sv/details?c=se&l=sv&ServiceTag=', # Sweden
1043 'uk' => $supportsite{emea} . 'en/details?c=uk&l=en&ServiceTag=', # United Kingdom
1044 'za' => $supportsite{emea} . 'en/details?c=za&l=en&ServiceTag=', # South Africa
1046 'br' => $supportsite{glob} . 'c=br&l=pt&ServiceTag=', # Brazil
1047 'ca' => $supportsite{glob} . 'c=ca&l=en&ServiceTag=', # Canada
1048 'mx' => $supportsite{glob} . 'c=mx&l=es&ServiceTag=', # Mexico
1049 'us' => $supportsite{glob} . 'c=us&l=en&ServiceTag=', # USA
1051 'au' => $supportsite{ap} . 'c=au&l=en&ServiceTag=', # Australia
1052 'cn' => $supportsite{ap} . 'c=cn&l=zh&ServiceTag=', # China
1053 'in' => $supportsite{ap} . 'c=in&l=en&ServiceTag=', # India
1055 'XX' => $supportsite{glob} . 'ServiceTag=', # default
1058 if (exists $url{$opt{htmlinfo}}) {
1059 return $url{$opt{htmlinfo}} . $tag;
1062 return $url{XX} . $tag;
1067 # This helper function returns the corresponding value of a hash key,
1068 # but takes into account that the key may not exist
1070 my $key = shift || return undef;
1072 return exists $hash->{$key} ? $hash->{$key} : "Undefined value $key";
1077 #---------------------------------------------------------------------
1079 #---------------------------------------------------------------------
1081 #-----------------------------------------
1082 # Check global health status
1083 #-----------------------------------------
1089 # Checks global status, i.e. both storage and chassis
1091 my $systemStateGlobalSystemStatus = '1.3.6.1.4.1.674.10892.1.200.10.1.2.1';
1092 my $result = $snmp_session->get_request(-varbindlist => [$systemStateGlobalSystemStatus]);
1093 if (!defined $result) {
1094 printf "SNMP ERROR [global]: %s\n", $snmp_error;
1097 $health = $status2nagios{$snmp_status{$result->{$systemStateGlobalSystemStatus}}};
1101 # NB! This does not check storage, only chassis...
1103 foreach (@{ run_command("$omreport $omopt_system -fmt ssv") }) {
1105 next if m/\A SEVERITY;COMPONENT/xms;
1106 if (m/\A (.+?);Main\sSystem(\sChassis)? /xms) {
1107 $health = $status2nagios{$1};
1118 #-----------------------------------------
1119 # STORAGE: Check controllers
1120 #-----------------------------------------
1121 sub check_controllers {
1122 return if blacklisted('ctrl', 'all');
1131 my $firmware = undef;
1133 my $minstdr = undef; # Minimum required Storport driver version
1134 my $stdr = undef; # Storport driver version
1140 '1.3.6.1.4.1.674.10893.1.20.130.1.1.1' => 'controllerNumber',
1141 '1.3.6.1.4.1.674.10893.1.20.130.1.1.2' => 'controllerName',
1142 '1.3.6.1.4.1.674.10893.1.20.130.1.1.5' => 'controllerState',
1143 '1.3.6.1.4.1.674.10893.1.20.130.1.1.8' => 'controllerFWVersion',
1144 '1.3.6.1.4.1.674.10893.1.20.130.1.1.38' => 'controllerComponentStatus',
1145 '1.3.6.1.4.1.674.10893.1.20.130.1.1.39' => 'controllerNexusID',
1146 '1.3.6.1.4.1.674.10893.1.20.130.1.1.41' => 'controllerDriverVersion',
1147 '1.3.6.1.4.1.674.10893.1.20.130.1.1.44' => 'controllerMinFWVersion',
1148 '1.3.6.1.4.1.674.10893.1.20.130.1.1.45' => 'controllerMinDriverVersion',
1149 '1.3.6.1.4.1.674.10893.1.20.130.1.1.55' => 'controllerStorportDriverVersion',
1150 '1.3.6.1.4.1.674.10893.1.20.130.1.1.56' => 'controllerMinRequiredStorportVer',
1153 # We use get_table() here for the odd case where a server has
1154 # two or more controllers, and where some OIDs are missing on
1155 # one of the controllers.
1156 my $controllerTable = '1.3.6.1.4.1.674.10893.1.20.130.1';
1157 my $result = $snmp_session->get_table(-baseoid => $controllerTable);
1159 if (!defined $result) {
1160 printf "SNMP ERROR [storage / controller]: %s.\n", $snmp_session->error;
1161 $snmp_session->close;
1165 # # No controllers is OK
1166 # return if !defined $result;
1168 @output = @{ get_snmp_output($result, \%ctrl_oid) };
1171 @output = @{ run_omreport('storage controller') };
1185 foreach my $out (@output) {
1187 $name = $out->{controllerName};
1188 $state = get_hashval($out->{controllerState}, \%ctrl_state);
1189 $status = $snmp_status{$out->{controllerComponentStatus}};
1190 $minfw = exists $out->{controllerMinFWVersion}
1191 ? $out->{controllerMinFWVersion} : undef;
1192 $mindr = exists $out->{controllerMinDriverVersion}
1193 ? $out->{controllerMinDriverVersion} : undef;
1194 $firmware = exists $out->{controllerFWVersion}
1195 ? $out->{controllerFWVersion} : 'N/A';
1196 $driver = exists $out->{controllerDriverVersion}
1197 ? $out->{controllerDriverVersion} : 'N/A';
1198 $minstdr = exists $out->{'controllerMinRequiredStorportVer'}
1199 ? $out->{controllerMinRequiredStorportVer} : undef;
1200 $stdr = exists $out->{controllerStorportDriverVersion}
1201 ? $out->{controllerStorportDriverVersion} : undef;
1202 $nexus = convert_nexus($out->{controllerNexusID});
1207 $name = $out->{Name};
1208 $state = $out->{State};
1209 $status = $out->{Status};
1210 $minfw = $out->{'Minimum Required Firmware Version'} ne 'Not Applicable'
1211 ? $out->{'Minimum Required Firmware Version'} : undef;
1212 $mindr = $out->{'Minimum Required Driver Version'} ne 'Not Applicable'
1213 ? $out->{'Minimum Required Driver Version'} : undef;
1214 $firmware = $out->{'Firmware Version'} ne 'Not Applicable'
1215 ? $out->{'Firmware Version'} : 'N/A';
1216 $driver = $out->{'Driver Version'} ne 'Not Applicable'
1217 ? $out->{'Driver Version'} : 'N/A';
1218 $minstdr = (exists $out->{'Minimum Required Storport Driver Version'}
1219 and $out->{'Minimum Required Storport Driver Version'} ne 'Not Applicable')
1220 ? $out->{'Minimum Required Storport Driver Version'} : undef;
1221 $stdr = (exists $out->{'Storport Driver Version'}
1222 and $out->{'Storport Driver Version'} ne 'Not Applicable')
1223 ? $out->{'Storport Driver Version'} : undef;
1227 $name =~ s{\s+\z}{}xms; # remove trailing whitespace
1228 push @controllers, $id;
1230 # Collecting some storage info
1231 $sysinfo{'controller'}{$id}{'id'} = $nexus;
1232 $sysinfo{'controller'}{$id}{'name'} = $name;
1233 $sysinfo{'controller'}{$id}{'driver'} = $driver;
1234 $sysinfo{'controller'}{$id}{'firmware'} = $firmware;
1235 $sysinfo{'controller'}{$id}{'storport'} = $stdr;
1237 # Store controller info for future use (SNMP)
1239 $snmp_controller{$out->{controllerNumber}} = $nexus;
1242 next CTRL if blacklisted('ctrl', $nexus);
1244 # Special case: old firmware
1245 if (!blacklisted('ctrl_fw', $id) && defined $minfw) {
1247 my $msg = sprintf q{Controller %d [%s]: Firmware '%s' is out of date},
1248 $id, $name, $firmware;
1249 report('storage', $msg, $E_WARNING, $nexus);
1251 # Special case: old driver
1252 if (!blacklisted('ctrl_driver', $id) && defined $mindr) {
1254 my $msg = sprintf q{Controller %d [%s]: Driver '%s' is out of date},
1255 $id, $name, $driver;
1256 report('storage', $msg, $E_WARNING, $nexus);
1258 # Special case: old storport driver
1259 if (!blacklisted('ctrl_stdr', $id) && defined $minstdr) {
1261 my $msg = sprintf q{Controller %d [%s]: Storport driver '%s' is out of date},
1263 report('storage', $msg, $E_WARNING, $nexus);
1266 if ($status eq 'Ok' or ($status eq 'Non-Critical'
1267 and (defined $minfw or defined $mindr or defined $minstdr))) {
1268 my $msg = sprintf 'Controller %d [%s] is %s',
1270 report('storage', $msg, $E_OK, $nexus);
1274 my $msg = sprintf 'Controller %d [%s] needs attention: %s',
1276 report('storage', $msg, $status2nagios{$status}, $nexus);
1283 #-----------------------------------------
1284 # STORAGE: Check physical drives
1285 #-----------------------------------------
1286 sub check_physical_disks {
1287 return if $#controllers == -1;
1288 return if blacklisted('pdisk', 'all');
1298 my $vendor = undef; # disk vendor
1299 my $product = undef; # product ID
1300 my $capacity = undef; # disk length (size) in bytes
1301 my $media = undef; # media type (e.g. HDD, SSD)
1302 my $bus = undef; # bus protocol (e.g. SAS, SATA)
1303 my $spare = undef; # spare state (e.g. global hotspare)
1309 '1.3.6.1.4.1.674.10893.1.20.130.4.1.1' => 'arrayDiskNumber',
1310 '1.3.6.1.4.1.674.10893.1.20.130.4.1.2' => 'arrayDiskName',
1311 '1.3.6.1.4.1.674.10893.1.20.130.4.1.3' => 'arrayDiskVendor',
1312 '1.3.6.1.4.1.674.10893.1.20.130.4.1.4' => 'arrayDiskState',
1313 '1.3.6.1.4.1.674.10893.1.20.130.4.1.6' => 'arrayDiskProductID',
1314 '1.3.6.1.4.1.674.10893.1.20.130.4.1.9' => 'arrayDiskEnclosureID',
1315 '1.3.6.1.4.1.674.10893.1.20.130.4.1.10' => 'arrayDiskChannel',
1316 '1.3.6.1.4.1.674.10893.1.20.130.4.1.11' => 'arrayDiskLengthInMB',
1317 '1.3.6.1.4.1.674.10893.1.20.130.4.1.15' => 'arrayDiskTargetID',
1318 '1.3.6.1.4.1.674.10893.1.20.130.4.1.21' => 'arrayDiskBusType',
1319 '1.3.6.1.4.1.674.10893.1.20.130.4.1.22' => 'arrayDiskSpareState',
1320 '1.3.6.1.4.1.674.10893.1.20.130.4.1.24' => 'arrayDiskComponentStatus',
1321 '1.3.6.1.4.1.674.10893.1.20.130.4.1.26' => 'arrayDiskNexusID',
1322 '1.3.6.1.4.1.674.10893.1.20.130.4.1.31' => 'arrayDiskSmartAlertIndication',
1323 '1.3.6.1.4.1.674.10893.1.20.130.4.1.35' => 'arrayDiskMediaType',
1324 '1.3.6.1.4.1.674.10893.1.20.130.5.1.7' => 'arrayDiskEnclosureConnectionControllerNumber',
1325 '1.3.6.1.4.1.674.10893.1.20.130.6.1.7' => 'arrayDiskChannelConnectionControllerNumber',
1328 if ($opt{use_get_table}) {
1329 my $arrayDiskTable = '1.3.6.1.4.1.674.10893.1.20.130.4';
1330 my $arrayDiskEnclosureConnectionControllerNumber = '1.3.6.1.4.1.674.10893.1.20.130.5.1.7';
1331 my $arrayDiskChannelConnectionControllerNumber = '1.3.6.1.4.1.674.10893.1.20.130.6.1.7';
1333 $result = $snmp_session->get_table(-baseoid => $arrayDiskTable);
1334 my $ext1 = $snmp_session->get_table(-baseoid => $arrayDiskEnclosureConnectionControllerNumber);
1335 my $ext2 = $snmp_session->get_table(-baseoid => $arrayDiskChannelConnectionControllerNumber);
1337 if (defined $result) {
1338 defined $ext1 && map { $$result{$_} = $$ext1{$_} } keys %{ $ext1 };
1339 defined $ext2 && map { $$result{$_} = $$ext2{$_} } keys %{ $ext2 };
1343 $result = $snmp_session->get_entries(-columns => [keys %pdisk_oid]);
1346 if (!defined $result) {
1347 printf "SNMP ERROR [storage / pdisk]: %s.\n", $snmp_session->error;
1348 $snmp_session->close;
1352 @output = @{ get_snmp_output($result, \%pdisk_oid) };
1355 foreach my $c (@controllers) {
1356 # This blacklists disks with broken firmware, which includes
1357 # illegal XML characters that makes openmanage choke on itself
1358 next if blacklisted('ctrl_pdisk', $c);
1360 push @output, @{ run_omreport("storage pdisk controller=$c") };
1361 map_item('ctrl', $c, \@output);
1367 1 => 'VD member', # disk is a member of a virtual disk
1368 2 => 'DG member', # disk is a member of a disk group
1369 3 => 'Global HS', # disk is a global hot spare
1370 4 => 'Dedicated HS', # disk is a dedicated hot spare
1371 5 => 'no', # not a spare
1372 99 => 'n/a', # not applicable
1386 3 => 'Fibre Channel',
1404 22 => 'Replacing', # FIXME: this one is not defined in the OMSA MIBs
1408 28 => 'Diagnostics',
1409 34 => 'Predictive failure',
1410 35 => 'Initializing',
1413 41 => 'Unsupported',
1414 53 => 'Incompatible',
1417 # Check physical disks on each of the controllers
1419 foreach my $out (@output) {
1421 $name = $out->{arrayDiskName};
1422 if (exists $out->{arrayDiskEnclosureID}) {
1423 $id = join q{:}, ($out->{arrayDiskChannel}, $out->{arrayDiskEnclosureID},
1424 $out->{arrayDiskTargetID});
1427 $id = join q{:}, ($out->{arrayDiskChannel}, $out->{arrayDiskTargetID});
1429 $state = get_hashval($out->{arrayDiskState}, \%pdisk_state);
1430 $status = $snmp_status{$out->{arrayDiskComponentStatus}};
1431 $fpred = exists $out->{arrayDiskSmartAlertIndication}
1432 && $out->{arrayDiskSmartAlertIndication} == 2 ? 1 : 0;
1434 $nexus = convert_nexus($out->{arrayDiskNexusID});
1435 $vendor = $out->{arrayDiskVendor};
1436 $product = $out->{arrayDiskProductID};
1437 $spare = get_hashval($out->{arrayDiskSpareState}, \%spare_state);
1438 $bus = exists $out->{arrayDiskBusType}
1439 ? get_hashval($out->{arrayDiskBusType}, \%bus_type) : undef;
1440 $media = exists $out->{arrayDiskMediaType}
1441 ? get_hashval($out->{arrayDiskMediaType}, \%media_type) : undef;
1442 $capacity = $out->{arrayDiskLengthInMB} * 1024**2;
1444 # try to find the controller where the disk belongs
1445 if (exists $out->{arrayDiskEnclosureConnectionControllerNumber}) {
1446 # for disks that are attached to an enclosure
1447 $ctrl = $snmp_controller{$out->{arrayDiskEnclosureConnectionControllerNumber}};
1449 elsif (exists $out->{arrayDiskChannelConnectionControllerNumber}) {
1450 # for disks that are not attached to an enclosure
1451 $ctrl = $snmp_controller{$out->{arrayDiskChannelConnectionControllerNumber}};
1454 # last resort... use the nexus id (old/broken hardware)
1456 $ctrl =~ s{\A (\d+) : .* \z}{$1}xms;
1461 $name = $out->{'Name'};
1462 $state = $out->{'State'};
1463 $status = $out->{'Status'};
1464 $fpred = lc($out->{'Failure Predicted'}) eq 'yes' ? 1 : 0;
1465 $progr = ' [' . $out->{'Progress'} . ']';
1466 $ctrl = $out->{'ctrl'};
1467 $nexus = join q{:}, $out->{ctrl}, $id;
1468 $vendor = $out->{'Vendor ID'};
1469 $product = $out->{'Product ID'};
1470 $media = $out->{'Media'};
1471 $spare = $out->{'Hot Spare'};
1472 $bus = $out->{'Bus Protocol'};
1473 $capacity = $out->{'Capacity'};
1474 $capacity =~ s{\A .*? \((\d+) \s bytes\) \z}{$1}xms;
1477 next PDISK if blacklisted('pdisk', $nexus);
1480 $vendor =~ s{\s+\z}{}xms; # remove trailing whitespace
1481 $product =~ s{\s+\z}{}xms; # remove trailing whitespace
1484 if ($spare eq 'Global') { $spare = 'Global HS'; }
1485 elsif ($spare eq 'Dedicated') { $spare = 'Dedicated HS'; }
1486 elsif ($spare !~ m{\A Global|Dedicated}xms) { $spare = undef; }
1488 # Calculate human readable capacity
1489 $capacity = ceil($capacity / 1000**3) >= 1000
1490 ? sprintf '%.1fTB', ($capacity / 1000**4)
1491 : sprintf '%.0fGB', ($capacity / 1000**3);
1492 $capacity = '450GB' if $capacity eq '449GB'; # quick fix for 450GB disks
1493 $capacity = '300GB' if $capacity eq '299GB'; # quick fix for 300GB disks
1494 $capacity = '146GB' if $capacity eq '147GB'; # quick fix for 146GB disks
1495 $capacity = '100GB' if $capacity eq '99GB'; # quick fix for 100GB disks
1497 # Capitalize only the first letter of the vendor name
1498 $vendor = (substr $vendor, 0, 1) . lc (substr $vendor, 1, length $vendor);
1500 # Remove unnecessary trademark rubbish from vendor name
1501 $vendor =~ s{\(tm\)\z}{}xms;
1503 # bus and media aren't always defined
1505 if (defined $bus && defined $media) { $busmedia = "$bus-$media "; }
1506 elsif (defined $bus && ! defined $media) { $busmedia = "$bus "; }
1507 elsif (! defined $bus && defined $media) { $busmedia = "$media "; }
1509 # Special case: Failure predicted
1511 my $msg = sprintf '%s [%s %s, %s] on ctrl %d needs attention: Failure Predicted',
1512 $name, $vendor, $product, $capacity, $ctrl;
1513 $msg .= " ($state)" if $state ne 'Predictive failure';
1514 report('storage', $msg,
1515 ($status2nagios{$status} == $E_CRITICAL ? $E_CRITICAL : $E_WARNING), $nexus);
1517 # Special case: Rebuilding / Replacing
1518 elsif ($state =~ m{\A Rebuilding|Replacing \z}xms) {
1519 my $msg = sprintf '%s [%s %s, %s] on ctrl %d is %s%s',
1520 $name, $vendor, $product, $capacity, $ctrl, $state, $progr;
1521 report('storage', $msg, $E_WARNING, $nexus);
1524 elsif ($status ne 'Ok') {
1525 my $msg = sprintf '%s [%s %s, %s] on ctrl %d needs attention: %s',
1526 $name, $vendor, $product, $capacity, $ctrl, $state;
1527 report('storage', $msg, $status2nagios{$status}, $nexus);
1531 my $msg = sprintf '%s [%s%s] on ctrl %d is %s',
1532 $name, $busmedia, $capacity, $ctrl, $state;
1533 if (defined $spare) { $msg .= " ($spare)"; }
1534 report('storage', $msg, $E_OK, $nexus);
1541 #-----------------------------------------
1542 # STORAGE: Check logical drives
1543 #-----------------------------------------
1544 sub check_virtual_disks {
1545 return if $#controllers == -1;
1546 return if blacklisted('vdisk', 'all');
1563 '1.3.6.1.4.1.674.10893.1.20.140.1.1.3' => 'virtualDiskDeviceName',
1564 '1.3.6.1.4.1.674.10893.1.20.140.1.1.4' => 'virtualDiskState',
1565 '1.3.6.1.4.1.674.10893.1.20.140.1.1.6' => 'virtualDiskLengthInMB',
1566 '1.3.6.1.4.1.674.10893.1.20.140.1.1.13' => 'virtualDiskLayout',
1567 '1.3.6.1.4.1.674.10893.1.20.140.1.1.17' => 'virtualDiskTargetID',
1568 '1.3.6.1.4.1.674.10893.1.20.140.1.1.20' => 'virtualDiskComponentStatus',
1569 '1.3.6.1.4.1.674.10893.1.20.140.1.1.21' => 'virtualDiskNexusID',
1572 if ($opt{use_get_table}) {
1573 my $virtualDiskTable = '1.3.6.1.4.1.674.10893.1.20.140.1';
1574 $result = $snmp_session->get_table(-baseoid => $virtualDiskTable);
1577 $result = $snmp_session->get_entries(-columns => [keys %vdisk_oid]);
1580 # No logical drives is OK
1581 return if !defined $result;
1583 @output = @{ get_snmp_output($result, \%vdisk_oid) };
1586 foreach my $c (@controllers) {
1587 push @output, @{ run_omreport("storage vdisk controller=$c") };
1588 map_item('ctrl', $c, \@output);
1601 16 => 'Regenerating',
1604 32 => 'Reconstructing',
1605 35 => 'Initializing',
1606 36 => 'Background Initialization',
1607 38 => 'Resynching Paused',
1608 52 => 'Permanently Degraded',
1609 54 => 'Degraded Redundancy',
1614 1 => 'Concatenated',
1621 19 => 'Concatenated RAID-1',
1625 # Check virtual disks on each of the controllers
1627 foreach my $out (@output) {
1629 $id = $out->{virtualDiskTargetID};
1630 $dev = $out->{virtualDiskDeviceName};
1631 $state = get_hashval($out->{virtualDiskState}, \%vdisk_state);
1632 $layout = get_hashval($out->{virtualDiskLayout}, \%vdisk_layout);
1633 $status = $snmp_status{$out->{virtualDiskComponentStatus}};
1634 $size = sprintf '%.2f GB', $out->{virtualDiskLengthInMB} / 1024;
1635 $progr = q{}; # can't get this from SNMP(?)
1636 $nexus = convert_nexus($out->{virtualDiskNexusID});
1640 $dev = $out->{'Device Name'};
1641 $state = $out->{State};
1642 $status = $out->{Status};
1643 $layout = $out->{Layout};
1644 $size = $out->{Size};
1645 $progr = ' [' . $out->{Progress} . ']';
1646 $size =~ s{\A (.*GB).* \z}{$1}xms;
1647 $nexus = join q{:}, $out->{ctrl}, $id;
1648 $ctrl = $out->{ctrl};
1651 next VDISK if blacklisted('vdisk', $nexus);
1654 # The device name is undefined sometimes
1655 $dev = q{} if !defined $dev;
1657 # Special case: Regenerating
1658 if ($state eq 'Regenerating') {
1659 my $msg = sprintf q{Logical Drive '%s' [%s, %s] is %s%s},
1660 $dev, $layout, $size, $state, $progr;
1661 report('storage', $msg, $E_WARNING, $nexus);
1664 elsif ($status ne 'Ok') {
1665 my $msg = sprintf q{Logical Drive '%s' [%s, %s] needs attention: %s},
1666 $dev, $layout, $size, $state;
1667 report('storage', $msg, $status2nagios{$status}, $nexus);
1671 my $msg = sprintf q{Logical Drive '%s' [%s, %s] is %s},
1672 $dev, $layout, $size, $state;
1673 report('storage', $msg, $E_OK, $nexus);
1680 #-----------------------------------------
1681 # STORAGE: Check cache batteries
1682 #-----------------------------------------
1683 sub check_cache_battery {
1684 return if $#controllers == -1;
1685 return if blacklisted('bat', 'all');
1692 my $learn = undef; # learn state
1693 my $pred = undef; # battery's ability to be charged
1699 '1.3.6.1.4.1.674.10893.1.20.130.15.1.4' => 'batteryState',
1700 '1.3.6.1.4.1.674.10893.1.20.130.15.1.6' => 'batteryComponentStatus',
1701 '1.3.6.1.4.1.674.10893.1.20.130.15.1.9' => 'batteryNexusID',
1702 '1.3.6.1.4.1.674.10893.1.20.130.15.1.10' => 'batteryPredictedCapacity',
1703 '1.3.6.1.4.1.674.10893.1.20.130.15.1.12' => 'batteryLearnState',
1704 '1.3.6.1.4.1.674.10893.1.20.130.16.1.5' => 'batteryConnectionControllerNumber',
1707 if ($opt{use_get_table}) {
1708 my $batteryTable = '1.3.6.1.4.1.674.10893.1.20.130.15';
1709 my $batteryConnectionTable = '1.3.6.1.4.1.674.10893.1.20.130.16';
1711 $result = $snmp_session->get_table(-baseoid => $batteryTable);
1712 my $ext = $snmp_session->get_table(-baseoid => $batteryConnectionTable);
1714 if (defined $result) {
1715 defined $ext && map { $$result{$_} = $$ext{$_} } keys %{ $ext };
1719 $result = $snmp_session->get_entries(-columns => [keys %bat_oid]);
1722 # No cache battery is OK
1723 return if !defined $result;
1725 @output = @{ get_snmp_output($result, \%bat_oid) };
1728 foreach my $c (@controllers) {
1729 push @output, @{ run_omreport("storage battery controller=$c") };
1730 map_item('ctrl', $c, \@output);
1740 7 => 'Reconditioning',
1748 # Specifies the learn state activity of the battery
1758 # This property displays the battery's ability to be charged
1761 1 => 'Failed', # The battery cannot be charged and needs to be replaced
1762 2 => 'Ready', # The battery can be charged to full capacity
1763 4 => 'Unknown', # The battery is completing a Learn cycle. The charge capacity of the
1764 # battery cannot be determined until the Learn cycle is complete
1767 # Check battery on each of the controllers
1769 foreach my $out (@output) {
1771 $status = $snmp_status{$out->{batteryComponentStatus}};
1772 $state = get_hashval($out->{batteryState}, \%bat_state);
1773 $learn = get_hashval($out->{batteryLearnState}, \%bat_learn_state);
1774 $pred = get_hashval($out->{batteryPredictedCapacity}, \%bat_pred_cap);
1775 $ctrl = $out->{batteryConnectionControllerNumber} - 1;
1776 $nexus = convert_nexus($out->{batteryNexusID});
1778 $id =~ s{\A \d+:(\d+) \z}{$1}xms;
1782 $state = $out->{'State'};
1783 $status = $out->{'Status'};
1784 $learn = $out->{'Learn State'};
1785 $pred = $out->{'Predicted Capacity Status'};
1786 $ctrl = $out->{'ctrl'};
1787 $nexus = join q{:}, $out->{ctrl}, $id;
1790 next BATTERY if blacklisted('bat', $nexus);
1792 # Special case: Charging
1793 if ($state eq 'Charging') {
1794 if ($pred eq 'Failed') {
1795 my $msg = sprintf 'Cache Battery %d in controller %d is %s (%s) [replace battery]',
1796 $id, $ctrl, $state, $pred;
1797 report('storage', $msg, $E_CRITICAL, $nexus);
1800 next BATTERY if blacklisted('bat_charge', $nexus);
1801 my $msg = sprintf 'Cache Battery %d in controller %d is %s (%s) [probably harmless]',
1802 $id, $ctrl, $state, $pred;
1803 report('storage', $msg, $E_WARNING, $nexus);
1806 # Special case: Learning (battery learns its capacity)
1807 elsif ($state eq 'Learning') {
1808 if ($learn eq 'Failed') {
1809 my $msg = sprintf 'Cache Battery %d in controller %d is %s (%s)',
1810 $id, $ctrl, $state, $learn;
1811 report('storage', $msg, $E_CRITICAL, $nexus);
1814 next BATTERY if blacklisted('bat_charge', $nexus);
1815 my $msg = sprintf 'Cache Battery %d in controller %d is %s (%s) [probably harmless]',
1816 $id, $ctrl, $state, $learn;
1817 report('storage', $msg, $E_WARNING, $nexus);
1820 # Special case: Power Low (first part of recharge cycle)
1821 elsif ($state eq 'Power Low') {
1822 next BATTERY if blacklisted('bat_charge', $nexus);
1823 my $msg = sprintf 'Cache Battery %d in controller %d is %s [probably harmless]',
1825 report('storage', $msg, $E_WARNING, $nexus);
1827 # Special case: Degraded and Non-Critical (usually part of recharge cycle)
1828 elsif ($state eq 'Degraded' && $status eq 'Non-Critical') {
1829 next BATTERY if blacklisted('bat_charge', $nexus);
1830 my $msg = sprintf 'Cache Battery %d in controller %d is %s (%s) [probably harmless]',
1831 $id, $ctrl, $state, $status;
1832 report('storage', $msg, $E_WARNING, $nexus);
1835 elsif ($status ne 'Ok') {
1836 my $msg = sprintf 'Cache Battery %d in controller %d needs attention: %s (%s)',
1837 $id, $ctrl, $state, $status;
1838 report('storage', $msg, $status2nagios{$status}, $nexus);
1842 my $msg = sprintf 'Cache Battery %d in controller %d is %s',
1844 report('storage', $msg, $E_OK, $nexus);
1851 #-----------------------------------------
1852 # STORAGE: Check connectors (channels)
1853 #-----------------------------------------
1854 sub check_connectors {
1855 return if $#controllers == -1;
1856 return if blacklisted('conn', 'all');
1870 '1.3.6.1.4.1.674.10893.1.20.130.2.1.1' => 'channelNumber',
1871 '1.3.6.1.4.1.674.10893.1.20.130.2.1.2' => 'channelName',
1872 '1.3.6.1.4.1.674.10893.1.20.130.2.1.3' => 'channelState',
1873 '1.3.6.1.4.1.674.10893.1.20.130.2.1.8' => 'channelComponentStatus',
1874 '1.3.6.1.4.1.674.10893.1.20.130.2.1.9' => 'channelNexusID',
1875 '1.3.6.1.4.1.674.10893.1.20.130.2.1.11' => 'channelBusType',
1878 if ($opt{use_get_table}) {
1879 my $channelTable = '1.3.6.1.4.1.674.10893.1.20.130.2';
1880 $result = $snmp_session->get_table(-baseoid => $channelTable);
1883 $result = $snmp_session->get_entries(-columns => [keys %conn_oid]);
1886 if (!defined $result) {
1887 printf "SNMP ERROR [storage / channel]: %s.\n", $snmp_session->error;
1888 $snmp_session->close;
1892 @output = @{ get_snmp_output($result, \%conn_oid) };
1895 foreach my $c (@controllers) {
1896 push @output, @{ run_omreport("storage connector controller=$c") };
1897 map_item('ctrl', $c, \@output);
1915 3 => 'Fibre Channel',
1922 # Check connectors on each of the controllers
1924 foreach my $out (@output) {
1926 $id = $out->{channelNumber} - 1;
1927 $name = $out->{channelName};
1928 $status = $snmp_status{$out->{channelComponentStatus}};
1929 $state = get_hashval($out->{channelState}, \%conn_state);
1930 $type = get_hashval($out->{channelBusType}, \%conn_bustype);
1931 $nexus = convert_nexus($out->{channelNexusID});
1933 $ctrl =~ s{(\d+):\d+}{$1}xms;
1934 # workaround for ancient OMSA versions
1935 if (! defined $type) { $type = 'n/a'; }
1939 $name = $out->{'Name'};
1940 $state = $out->{'State'};
1941 $status = $out->{'Status'};
1942 $type = $out->{'Connector Type'};
1943 $ctrl = $out->{ctrl};
1944 $nexus = join q{:}, $out->{ctrl}, $id;
1947 next CHANNEL if blacklisted('conn', $nexus);
1949 my $msg = sprintf '%s [%s] on controller %d is %s',
1950 $name, $type, $ctrl, $state;
1951 report('storage', $msg, $status2nagios{$status}, $nexus);
1957 #-----------------------------------------
1958 # STORAGE: Check enclosures
1959 #-----------------------------------------
1960 sub check_enclosures {
1961 return if blacklisted('encl', 'all');
1968 my $firmware = undef;
1970 my $occupied_slots = undef; # number of occupied slots
1971 my $total_slots = undef; # number of total slots
1977 '1.3.6.1.4.1.674.10893.1.20.130.3.1.1' => 'enclosureNumber',
1978 '1.3.6.1.4.1.674.10893.1.20.130.3.1.2' => 'enclosureName',
1979 '1.3.6.1.4.1.674.10893.1.20.130.3.1.4' => 'enclosureState',
1980 '1.3.6.1.4.1.674.10893.1.20.130.3.1.19' => 'enclosureChannelNumber',
1981 '1.3.6.1.4.1.674.10893.1.20.130.3.1.24' => 'enclosureComponentStatus',
1982 '1.3.6.1.4.1.674.10893.1.20.130.3.1.25' => 'enclosureNexusID',
1983 '1.3.6.1.4.1.674.10893.1.20.130.3.1.26' => 'enclosureFirmwareVersion',
1984 '1.3.6.1.4.1.674.10893.1.20.130.3.1.31' => 'enclosureOccupiedSlotCount', # new in OMSA 6.3.0
1985 '1.3.6.1.4.1.674.10893.1.20.130.3.1.32' => 'enclosureTotalSlots', # new in OMSA 6.3.0
1988 if ($opt{use_get_table}) {
1989 my $enclosureTable = '1.3.6.1.4.1.674.10893.1.20.130.3';
1990 $result = $snmp_session->get_table(-baseoid => $enclosureTable);
1993 $result = $snmp_session->get_entries(-columns => [keys %encl_oid]);
1996 # No enclosures is OK
1997 return if !defined $result;
1999 @output = @{ get_snmp_output($result, \%encl_oid) };
2002 foreach my $c (@controllers) {
2003 push @output, @{ run_omreport("storage enclosure controller=$c") };
2004 map_item('ctrl', $c, \@output);
2019 foreach my $out (@output) {
2021 $id = $out->{enclosureNumber} - 1;
2022 $name = $out->{enclosureName};
2023 $state = get_hashval($out->{enclosureState}, \%encl_state);
2024 $status = $snmp_status{$out->{enclosureComponentStatus}};
2025 $firmware = exists $out->{enclosureFirmwareVersion}
2026 ? $out->{enclosureFirmwareVersion} : 'N/A';
2027 $nexus = convert_nexus($out->{enclosureNexusID});
2029 $ctrl =~ s{\A (\d+):.* \z}{$1}xms;
2030 # for the next two, a value of 9999 means feature not available
2031 $occupied_slots = exists $out->{enclosureOccupiedSlotCount}
2032 && $out->{enclosureOccupiedSlotCount} != 9999
2033 ? $out->{enclosureOccupiedSlotCount} : undef;
2034 $total_slots = exists $out->{enclosureTotalSlots}
2035 && $out->{enclosureTotalSlots} != 9999
2036 ? $out->{enclosureTotalSlots} : undef;
2040 $name = $out->{Name};
2041 $state = $out->{State};
2042 $status = $out->{Status};
2043 $firmware = $out->{'Firmware Version'} ne 'Not Applicable'
2044 ? $out->{'Firmware Version'} : 'N/A';
2045 $nexus = join q{:}, $out->{ctrl}, $id;
2046 $ctrl = $out->{ctrl};
2049 $name =~ s{\s+\z}{}xms; # remove trailing whitespace
2050 $firmware =~ s{\s+\z}{}xms; # remove trailing whitespace
2052 # store enclosure data for future use
2054 $snmp_enclosure{$out->{enclosureNumber}}{id} = $id;
2055 $snmp_enclosure{$out->{enclosureNumber}}{name} = $name;
2056 $snmp_enclosure{$out->{enclosureNumber}}{nexus} = $nexus;
2059 push @enclosures, { 'id' => $id,
2060 'ctrl' => $out->{ctrl},
2064 # Collecting some storage info
2065 $sysinfo{'enclosure'}{$nexus}{'id'} = $nexus;
2066 $sysinfo{'enclosure'}{$nexus}{'name'} = $name;
2067 $sysinfo{'enclosure'}{$nexus}{'firmware'} = $firmware;
2069 next ENCLOSURE if blacklisted('encl', $nexus);
2072 if (defined $occupied_slots && defined $total_slots) {
2073 $msg = sprintf 'Enclosure %s [%s, %d/%d slots occupied] on ctrl %d is %s',
2074 $nexus, $name, $occupied_slots, $total_slots, $ctrl, $state;
2077 $msg = sprintf 'Enclosure %s [%s] on controller %d is %s',
2078 $nexus, $name, $ctrl, $state;
2080 report('storage', $msg, $status2nagios{$status}, $nexus);
2086 #-----------------------------------------
2087 # STORAGE: Check enclosure fans
2088 #-----------------------------------------
2089 sub check_enclosure_fans {
2090 return if $#controllers == -1;
2091 return if blacklisted('encl_fan', 'all');
2099 my $encl_id = undef;
2100 my $encl_name = undef;
2106 '1.3.6.1.4.1.674.10893.1.20.130.7.1.1' => 'fanNumber',
2107 '1.3.6.1.4.1.674.10893.1.20.130.7.1.2' => 'fanName',
2108 '1.3.6.1.4.1.674.10893.1.20.130.7.1.4' => 'fanState',
2109 '1.3.6.1.4.1.674.10893.1.20.130.7.1.11' => 'fanProbeCurrValue',
2110 '1.3.6.1.4.1.674.10893.1.20.130.7.1.15' => 'fanComponentStatus',
2111 '1.3.6.1.4.1.674.10893.1.20.130.7.1.16' => 'fanNexusID',
2112 '1.3.6.1.4.1.674.10893.1.20.130.8.1.4' => 'fanConnectionEnclosureName',
2113 '1.3.6.1.4.1.674.10893.1.20.130.8.1.5' => 'fanConnectionEnclosureNumber',
2116 if ($opt{use_get_table}) {
2117 my $fanTable = '1.3.6.1.4.1.674.10893.1.20.130.7';
2118 my $fanConnectionTable = '1.3.6.1.4.1.674.10893.1.20.130.8';
2120 $result = $snmp_session->get_table(-baseoid => $fanTable);
2121 my $ext = $snmp_session->get_table(-baseoid => $fanConnectionTable);
2123 if (defined $result) {
2124 defined $ext && map { $$result{$_} = $$ext{$_} } keys %{ $ext };
2128 $result = $snmp_session->get_entries(-columns => [keys %fan_oid]);
2131 # No enclosure fans is OK
2132 return if !defined $result;
2134 @output = @{ get_snmp_output($result, \%fan_oid) };
2137 foreach my $enc (@enclosures) {
2138 push @output, @{ run_omreport("storage enclosure controller=$enc->{ctrl} enclosure=$enc->{id} info=fans") };
2139 map_item('ctrl', $enc->{ctrl}, \@output);
2140 map_item('encl_id', $enc->{id}, \@output);
2141 map_item('encl_name', $enc->{name}, \@output);
2156 # Check fans on each of the enclosures
2158 foreach my $out (@output) {
2160 $id = $out->{fanNumber} - 1;
2161 $name = $out->{fanName};
2162 $state = get_hashval($out->{fanState}, \%fan_state);
2163 $status = $snmp_status{$out->{fanComponentStatus}};
2164 $speed = $out->{fanProbeCurrValue};
2165 $encl_name = $out->{fanConnectionEnclosureName};
2166 $encl_id = $snmp_enclosure{$out->{fanConnectionEnclosureNumber}}{nexus};
2167 $nexus = convert_nexus($out->{fanNexusID});
2171 $name = $out->{'Name'};
2172 $state = $out->{'State'};
2173 $status = $out->{'Status'};
2174 $speed = $out->{'Speed'};
2175 $encl_id = join q{:}, $out->{ctrl}, $out->{'encl_id'};
2176 $encl_name = $out->{encl_name};
2177 $nexus = join q{:}, $out->{ctrl}, $out->{'encl_id'}, $id;
2180 next FAN if blacklisted('encl_fan', $nexus);
2183 if ($status ne 'Ok') {
2184 my $msg = sprintf '%s in enclosure %s [%s] needs attention: %s',
2185 $name, $encl_id, $encl_name, $state;
2186 report('storage', $msg, $status2nagios{$status}, $nexus);
2190 my $msg = sprintf '%s in enclosure %s [%s] is %s (speed=%s)',
2191 $name, $encl_id, $encl_name, $state, $speed;
2192 report('storage', $msg, $E_OK, $nexus);
2199 #-----------------------------------------
2200 # STORAGE: Check enclosure power supplies
2201 #-----------------------------------------
2202 sub check_enclosure_pwr {
2203 return if $#controllers == -1;
2204 return if blacklisted('encl_ps', 'all');
2211 my $encl_id = undef;
2212 my $encl_name = undef;
2218 '1.3.6.1.4.1.674.10893.1.20.130.9.1.1' => 'powerSupplyNumber',
2219 '1.3.6.1.4.1.674.10893.1.20.130.9.1.2' => 'powerSupplyName',
2220 '1.3.6.1.4.1.674.10893.1.20.130.9.1.4' => 'powerSupplyState',
2221 '1.3.6.1.4.1.674.10893.1.20.130.9.1.9' => 'powerSupplyComponentStatus',
2222 '1.3.6.1.4.1.674.10893.1.20.130.9.1.10' => 'powerSupplyNexusID',
2223 '1.3.6.1.4.1.674.10893.1.20.130.10.1.4' => 'powerSupplyConnectionEnclosureName',
2224 '1.3.6.1.4.1.674.10893.1.20.130.10.1.5' => 'powerSupplyConnectionEnclosureNumber',
2227 if ($opt{use_get_table}) {
2228 my $powerSupplyTable = '1.3.6.1.4.1.674.10893.1.20.130.9';
2229 my $powerSupplyConnectionTable = '1.3.6.1.4.1.674.10893.1.20.130.10';
2231 $result = $snmp_session->get_table(-baseoid => $powerSupplyTable);
2232 my $ext = $snmp_session->get_table(-baseoid => $powerSupplyConnectionTable);
2234 if (defined $result) {
2235 defined $ext && map { $$result{$_} = $$ext{$_} } keys %{ $ext };
2239 $result = $snmp_session->get_entries(-columns => [keys %ps_oid]);
2242 # No enclosure power supplies is OK
2243 return if !defined $result;
2245 @output = @{ get_snmp_output($result, \%ps_oid) };
2248 foreach my $enc (@enclosures) {
2249 push @output, @{ run_omreport("storage enclosure controller=$enc->{ctrl} enclosure=$enc->{id} info=pwrsupplies") };
2250 map_item('ctrl', $enc->{ctrl}, \@output);
2251 map_item('encl_id', $enc->{id}, \@output);
2252 map_item('encl_name', $enc->{name}, \@output);
2261 5 => 'Not Installed',
2267 # Check power supplies on each of the enclosures
2269 foreach my $out (@output) {
2271 $id = $out->{powerSupplyNumber};
2272 $name = $out->{powerSupplyName};
2273 $state = get_hashval($out->{powerSupplyState}, \%ps_state);
2274 $status = $snmp_status{$out->{powerSupplyComponentStatus}};
2275 $encl_id = $snmp_enclosure{$out->{powerSupplyConnectionEnclosureNumber}}{nexus};
2276 $encl_name = $out->{powerSupplyConnectionEnclosureName};
2277 $nexus = convert_nexus($out->{powerSupplyNexusID});
2281 $name = $out->{'Name'};
2282 $state = $out->{'State'};
2283 $status = $out->{'Status'};
2284 $encl_id = join q{:}, $out->{ctrl}, $out->{'encl_id'};
2285 $encl_name = $out->{encl_name};
2286 $nexus = join q{:}, $out->{ctrl}, $out->{'encl_id'}, $id;
2289 next PS if blacklisted('encl_ps', $nexus);
2292 if ($status ne 'Ok') {
2293 my $msg = sprintf '%s in enclosure %s [%s] needs attention: %s',
2294 $name, $encl_id, $encl_name, $state;
2295 report('storage', $msg, $status2nagios{$status}, $nexus);
2299 my $msg = sprintf '%s in enclosure %s [%s] is %s',
2300 $name, $encl_id, $encl_name, $state;
2301 report('storage', $msg, $E_OK, $nexus);
2308 #-----------------------------------------
2309 # STORAGE: Check enclosure temperatures
2310 #-----------------------------------------
2311 sub check_enclosure_temp {
2312 return if $#controllers == -1;
2313 return if blacklisted('encl_temp', 'all');
2320 my $reading = undef;
2322 my $max_warn = undef;
2323 my $max_crit = undef;
2324 my $min_warn = undef;
2325 my $min_crit = undef;
2326 my $encl_id = undef;
2327 my $encl_name = undef;
2333 '1.3.6.1.4.1.674.10893.1.20.130.11.1.1' => 'temperatureProbeNumber',
2334 '1.3.6.1.4.1.674.10893.1.20.130.11.1.2' => 'temperatureProbeName',
2335 '1.3.6.1.4.1.674.10893.1.20.130.11.1.4' => 'temperatureProbeState',
2336 '1.3.6.1.4.1.674.10893.1.20.130.11.1.6' => 'temperatureProbeUnit',
2337 '1.3.6.1.4.1.674.10893.1.20.130.11.1.7' => 'temperatureProbeMinWarning',
2338 '1.3.6.1.4.1.674.10893.1.20.130.11.1.8' => 'temperatureProbeMinCritical',
2339 '1.3.6.1.4.1.674.10893.1.20.130.11.1.9' => 'temperatureProbeMaxWarning',
2340 '1.3.6.1.4.1.674.10893.1.20.130.11.1.10' => 'temperatureProbeMaxCritical',
2341 '1.3.6.1.4.1.674.10893.1.20.130.11.1.11' => 'temperatureProbeCurValue',
2342 '1.3.6.1.4.1.674.10893.1.20.130.11.1.13' => 'temperatureProbeComponentStatus',
2343 '1.3.6.1.4.1.674.10893.1.20.130.11.1.14' => 'temperatureProbeNexusID',
2344 '1.3.6.1.4.1.674.10893.1.20.130.12.1.4' => 'temperatureConnectionEnclosureName',
2345 '1.3.6.1.4.1.674.10893.1.20.130.12.1.5' => 'temperatureConnectionEnclosureNumber',
2348 if ($opt{use_get_table}) {
2349 my $temperatureProbeTable = '1.3.6.1.4.1.674.10893.1.20.130.11';
2350 my $temperatureConnectionTable = '1.3.6.1.4.1.674.10893.1.20.130.12';
2352 $result = $snmp_session->get_table(-baseoid => $temperatureProbeTable);
2353 my $ext = $snmp_session->get_table(-baseoid => $temperatureConnectionTable);
2355 if (defined $result) {
2356 defined $ext && map { $$result{$_} = $$ext{$_} } keys %{ $ext };
2360 $result = $snmp_session->get_entries(-columns => [keys %temp_oid]);
2363 # No enclosure temperature probes is OK
2364 return if !defined $result;
2366 @output = @{ get_snmp_output($result, \%temp_oid) };
2369 foreach my $enc (@enclosures) {
2370 push @output, @{ run_omreport("storage enclosure controller=$enc->{ctrl} enclosure=$enc->{id} info=temps") };
2371 map_item('ctrl', $enc->{ctrl}, \@output);
2372 map_item('encl_id', $enc->{id}, \@output);
2373 map_item('encl_name', $enc->{name}, \@output);
2388 # Check temperature probes on each of the enclosures
2390 foreach my $out (@output) {
2392 $id = $out->{temperatureProbeNumber} - 1;
2393 $name = $out->{temperatureProbeName};
2394 $state = get_hashval($out->{temperatureProbeState}, \%temp_state);
2395 $status = $snmp_status{$out->{temperatureProbeComponentStatus}};
2396 $unit = $out->{temperatureProbeUnit};
2397 $reading = exists $out->{temperatureProbeCurValue}
2398 ? $out->{temperatureProbeCurValue} : '[N/A]';
2399 $max_warn = exists $out->{temperatureProbeMaxWarning}
2400 ? $out->{temperatureProbeMaxWarning} : '[N/A]';
2401 $max_crit = exists $out->{temperatureProbeMaxCritical}
2402 ? $out->{temperatureProbeMaxCritical} : '[N/A]';
2403 $min_warn = exists $out->{temperatureProbeMinWarning}
2404 ? $out->{temperatureProbeMinWarning} : '[N/A]';
2405 $min_crit = exists $out->{temperatureProbeMinCritical}
2406 ? $out->{temperatureProbeMinCritical} : '[N/A]';
2407 $encl_id = $snmp_enclosure{$out->{temperatureConnectionEnclosureNumber}}{nexus};
2408 $encl_name = $out->{temperatureConnectionEnclosureName};
2409 $nexus = convert_nexus($out->{temperatureProbeNexusID});
2413 $name = $out->{'Name'};
2414 $state = $out->{'State'};
2415 $status = $out->{'Status'};
2417 $reading = $out->{'Reading'};
2418 $max_warn = $out->{'Maximum Warning Threshold'};
2419 $max_crit = $out->{'Maximum Failure Threshold'};
2420 $min_warn = $out->{'Minimum Warning Threshold'};
2421 $min_crit = $out->{'Minimum Failure Threshold'};
2422 $encl_id = join q{:}, $out->{ctrl}, $out->{'encl_id'};
2423 $encl_name = $out->{encl_name};
2424 $nexus = join q{:}, $out->{ctrl}, $out->{'encl_id'}, $id;
2427 next TEMP if blacklisted('encl_temp', $nexus);
2429 # Make sure these values are integers
2430 $reading =~ s{\A \s* (-?\d+) \s* C? \s* \z}{$1}xms or $reading = '[N/A]';
2431 $max_warn =~ s{\A \s* (-?\d+) \s* C? \s* \z}{$1}xms or $max_warn = '[N/A]';
2432 $max_crit =~ s{\A \s* (-?\d+) \s* C? \s* \z}{$1}xms or $max_crit = '[N/A]';
2433 $min_warn =~ s{\A \s* (-?\d+) \s* C? \s* \z}{$1}xms or $min_warn = '[N/A]';
2434 $min_crit =~ s{\A \s* (-?\d+) \s* C? \s* \z}{$1}xms or $min_crit = '[N/A]';
2436 # Inactive temp probes
2437 if ($status eq 'Unknown' and $state eq 'Inactive') {
2438 my $msg = sprintf '%s in enclosure %s [%s] is %s',
2439 $name, $encl_id, $encl_name, $state;
2440 report('storage', $msg, $E_OK, $nexus);
2442 elsif ($status ne 'Ok' and $max_crit ne '[N/A]' and $reading > $max_crit) {
2443 my $msg = sprintf '%s in enclosure %s [%s] is critically high at %d C',
2444 $name, $encl_id, $encl_name, $reading;
2445 my $err = $snmp ? $probestatus2nagios{$status} : $status2nagios{$status};
2446 report('chassis', $msg, $err, $nexus);
2448 elsif ($status ne 'Ok' and $max_warn ne '[N/A]' and $reading > $max_warn) {
2449 my $msg = sprintf '%s in enclosure %s [%s] is too high at %d C',
2450 $name, $encl_id, $encl_name, $reading;
2451 my $err = $snmp ? $probestatus2nagios{$status} : $status2nagios{$status};
2452 report('chassis', $msg, $err, $nexus);
2454 elsif ($status ne 'Ok' and $min_crit ne '[N/A]' and $reading < $min_crit) {
2455 my $msg = sprintf '%s in enclosure %s [%s] is critically low at %d C',
2456 $name, $encl_id, $encl_name, $reading;
2457 my $err = $snmp ? $probestatus2nagios{$status} : $status2nagios{$status};
2458 report('chassis', $msg, $err, $nexus);
2460 elsif ($status ne 'Ok' and $min_warn ne '[N/A]' and $reading < $min_warn) {
2461 my $msg = sprintf '%s in enclosure %s [%s] is too low at %d C',
2462 $name, $encl_id, $encl_name, $reading;
2463 my $err = $snmp ? $probestatus2nagios{$status} : $status2nagios{$status};
2464 report('chassis', $msg, $err, $nexus);
2467 elsif ($status ne 'Ok') {
2468 my $msg = sprintf '%s in enclosure %s [%s] is %s',
2469 $name, $encl_id, $encl_name, $state;
2470 if (defined $reading && $reading =~ m{\A -?\d+ \z}xms) {
2471 # take into account that with certain states the
2472 # reading doesn't exist or is not an integer
2473 $msg .= sprintf ' at %s C', $reading;
2474 if ($min_warn eq '[N/A]' or $min_crit eq '[N/A]') {
2475 $msg .= sprintf ' (max=%s/%s)', $max_warn, $max_crit;
2478 $msg .= sprintf ' (min=%s/%s, max=%s/%s)',
2479 $min_warn, $min_crit, $max_warn, $max_crit;
2482 my $err = $snmp ? $probestatus2nagios{$status} : $status2nagios{$status};
2483 report('storage', $msg, $err, $nexus);
2487 my $msg = sprintf '%s in enclosure %s [%s]',
2488 $name, $encl_id, $encl_name;
2489 if (defined $reading && $reading ne '[N/A]') {
2490 # take into account that with certain states the
2491 # reading doesn't exist or is not an integer
2492 $msg .= sprintf ' reads %d C', $reading;
2493 if ($min_warn eq '[N/A]' or $min_crit eq '[N/A]') {
2494 $msg .= sprintf ' (max=%s/%s)', $max_warn, $max_crit;
2497 $msg .= sprintf ' (min=%s/%s, max=%s/%s)',
2498 $min_warn, $min_crit, $max_warn, $max_crit;
2502 $msg .= sprintf ' is %s', $state;
2504 report('storage', $msg, $E_OK, $nexus);
2507 # Collect performance data
2508 if (defined $opt{perfdata} && $reading ne '[N/A]') {
2509 $name =~ s{\A Temperature\sProbe\s(\d+) \z}{temp_$1}gxms;
2510 my $label = "enclosure_${encl_id}_${name}";
2512 $mini =~ s{enclosure_(.+?)_temp_(.+?)}{e$1t$2}xms;
2526 #-----------------------------------------
2527 # STORAGE: Check enclosure management modules (EMM)
2528 #-----------------------------------------
2529 sub check_enclosure_emms {
2530 return if $#controllers == -1;
2531 return if blacklisted('encl_emm', 'all');
2538 my $encl_id = undef;
2539 my $encl_name = undef;
2545 '1.3.6.1.4.1.674.10893.1.20.130.13.1.1' => 'enclosureManagementModuleNumber',
2546 '1.3.6.1.4.1.674.10893.1.20.130.13.1.2' => 'enclosureManagementModuleName',
2547 '1.3.6.1.4.1.674.10893.1.20.130.13.1.4' => 'enclosureManagementModuleState',
2548 '1.3.6.1.4.1.674.10893.1.20.130.13.1.11' => 'enclosureManagementModuleComponentStatus',
2549 '1.3.6.1.4.1.674.10893.1.20.130.13.1.12' => 'enclosureManagementModuleNexusID',
2550 '1.3.6.1.4.1.674.10893.1.20.130.14.1.4' => 'enclosureManagementModuleConnectionEnclosureName',
2551 '1.3.6.1.4.1.674.10893.1.20.130.14.1.5' => 'enclosureManagementModuleConnectionEnclosureNumber',
2554 if ($opt{use_get_table}) {
2555 my $enclosureManagementModuleTable = '1.3.6.1.4.1.674.10893.1.20.130.13';
2556 my $enclosureManagementModuleConnectionTable = '1.3.6.1.4.1.674.10893.1.20.130.14';
2558 $result = $snmp_session->get_table(-baseoid => $enclosureManagementModuleTable);
2559 my $ext = $snmp_session->get_table(-baseoid => $enclosureManagementModuleConnectionTable);
2561 if (defined $result) {
2562 defined $ext && map { $$result{$_} = $$ext{$_} } keys %{ $ext };
2566 $result = $snmp_session->get_entries(-columns => [keys %emms_oid]);
2569 # No enclosure EMMs is OK
2570 return if !defined $result;
2572 @output = @{ get_snmp_output($result, \%emms_oid) };
2575 foreach my $enc (@enclosures) {
2576 push @output, @{ run_omreport("storage enclosure controller=$enc->{ctrl} enclosure=$enc->{id} info=emms") };
2577 map_item('ctrl', $enc->{ctrl}, \@output);
2578 map_item('encl_id', $enc->{id}, \@output);
2579 map_item('encl_name', $enc->{name}, \@output);
2590 5 => 'Not Installed',
2595 # Check EMMs on each of the enclosures
2597 foreach my $out (@output) {
2599 $id = $out->{enclosureManagementModuleNumber} - 1;
2600 $name = $out->{enclosureManagementModuleName};
2601 $state = get_hashval($out->{enclosureManagementModuleState}, \%emms_state);
2602 $status = $snmp_status{$out->{enclosureManagementModuleComponentStatus}};
2603 $encl_id = $snmp_enclosure{$out->{enclosureManagementModuleConnectionEnclosureNumber}}{nexus};
2604 $encl_name = $out->{enclosureManagementModuleConnectionEnclosureName};
2605 $nexus = convert_nexus($out->{enclosureManagementModuleNexusID});
2609 $name = $out->{'Name'};
2610 $state = $out->{'State'};
2611 $status = $out->{'Status'};
2612 $encl_id = join q{:}, $out->{ctrl}, $out->{'encl_id'};
2613 $encl_name = $out->{encl_name};
2614 $nexus = join q{:}, $out->{ctrl}, $out->{'encl_id'}, $id;
2617 next EMM if blacklisted('encl_emm', $nexus);
2620 if ($status =~ m{\A Other|Unknown \z}xms and $state eq 'Not Installed') {
2621 my $msg = sprintf '%s in enclosure %s [%s] is %s',
2622 $name, $encl_id, $encl_name, $state;
2623 report('storage', $msg, $E_OK, $nexus);
2626 elsif ($status ne 'Ok') {
2627 my $msg = sprintf '%s in enclosure %s [%s] needs attention: %s',
2628 $name, $encl_id, $encl_name, $state;
2629 report('storage', $msg, $status2nagios{$status}, $nexus);
2633 my $msg = sprintf '%s in enclosure %s [%s] is %s',
2634 $name, $encl_id, $encl_name, $state;
2635 report('storage', $msg, $E_OK, $nexus);
2642 #-----------------------------------------
2643 # CHASSIS: Check memory modules
2644 #-----------------------------------------
2646 return if blacklisted('dimm', 'all');
2650 my $location = undef;
2659 '1.3.6.1.4.1.674.10892.1.1100.50.1.2.1' => 'memoryDeviceIndex',
2660 '1.3.6.1.4.1.674.10892.1.1100.50.1.5.1' => 'memoryDeviceStatus',
2661 '1.3.6.1.4.1.674.10892.1.1100.50.1.8.1' => 'memoryDeviceLocationName',
2662 '1.3.6.1.4.1.674.10892.1.1100.50.1.14.1' => 'memoryDeviceSize',
2663 '1.3.6.1.4.1.674.10892.1.1100.50.1.20.1' => 'memoryDeviceFailureModes',
2666 if ($opt{use_get_table}) {
2667 my $memoryDeviceTable = '1.3.6.1.4.1.674.10892.1.1100.50.1';
2668 $result = $snmp_session->get_table(-baseoid => $memoryDeviceTable);
2671 $result = $snmp_session->get_entries(-columns => [keys %dimm_oid]);
2674 if (!defined $result) {
2675 printf "SNMP ERROR [memory]: %s.\n", $snmp_session->error;
2676 $snmp_session->close;
2680 @output = @{ get_snmp_output($result, \%dimm_oid) };
2683 @output = @{ run_omreport("$omopt_chassis memory") };
2686 # Note: These values are bit masks, so combination values are
2687 # possible. If value is 0 (zero), memory device has no faults.
2690 1 => 'ECC single bit correction warning rate exceeded',
2691 2 => 'ECC single bit correction failure rate exceeded',
2692 4 => 'ECC multibit fault encountered',
2693 8 => 'ECC single bit correction logging disabled',
2694 16 => 'device disabled because of spare activation',
2698 foreach my $out (@output) {
2699 @failures = (); # Initialize
2701 $index = $out->{memoryDeviceIndex};
2702 $status = $snmp_status{$out->{memoryDeviceStatus}};
2703 $location = $out->{memoryDeviceLocationName};
2704 $size = sprintf '%d MB', $out->{memoryDeviceSize}/1024;
2705 $modes = $out->{memoryDeviceFailureModes};
2707 foreach my $mask (sort keys %failure_mode) {
2708 if (($modes & $mask) != 0) { push @failures, $failure_mode{$mask}; }
2713 $index = $out->{'Type'} eq '[Not Occupied]' ? undef : $out->{'Index'};
2714 $status = $out->{'Status'};
2715 $location = $out->{'Connector Name'};
2716 $size = $out->{'Size'};
2717 if (defined $size) {
2718 $size =~ s{\s\s}{ }gxms;
2720 # Run 'omreport chassis memory index=X' to get the failures
2721 if ($status ne 'Ok' && defined $index) {
2722 foreach (@{ run_command("$omreport $omopt_chassis memory index=$index -fmt ssv") }) {
2723 if (m/\A Failures; (.+?) \z/xms) {
2724 chop(my $fail = $1);
2725 push @failures, split m{\.}xms, $fail;
2730 $location =~ s{\A \s*(.*?)\s* \z}{$1}xms;
2732 # calculate total memory
2733 my $msize = defined $size ? $size : 0;
2734 $msize =~ s{\A (\d+) \s MB}{$1}xms;
2735 $count{mem} += $msize;
2737 next DIMM if blacklisted('dimm', $index);
2739 # Ignore empty memory slots
2740 next DIMM if !defined $index;
2743 if ($status ne 'Ok') {
2745 if (scalar @failures == 0) {
2746 $msg = sprintf 'Memory module %d [%s, %s] needs attention (%s)',
2747 $index, $location, $size, $status;
2750 $msg = sprintf 'Memory module %d [%s, %s] needs attention: %s',
2751 $index, $location, $size, (join q{, }, @failures);
2754 report('chassis', $msg, $status2nagios{$status}, $index);
2758 my $msg = sprintf 'Memory module %d [%s, %s] is %s',
2759 $index, $location, $size, $status;
2760 report('chassis', $msg, $E_OK, $index);
2767 #-----------------------------------------
2768 # CHASSIS: Check fans
2769 #-----------------------------------------
2771 return if blacklisted('fan', 'all');
2775 my $reading = undef;
2776 my $location = undef;
2777 my $max_crit = undef;
2778 my $max_warn = undef;
2784 '1.3.6.1.4.1.674.10892.1.700.12.1.2.1' => 'coolingDeviceIndex',
2785 '1.3.6.1.4.1.674.10892.1.700.12.1.5.1' => 'coolingDeviceStatus',
2786 '1.3.6.1.4.1.674.10892.1.700.12.1.6.1' => 'coolingDeviceReading',
2787 '1.3.6.1.4.1.674.10892.1.700.12.1.8.1' => 'coolingDeviceLocationName',
2788 '1.3.6.1.4.1.674.10892.1.700.12.1.10.1' => 'coolingDeviceUpperCriticalThreshold',
2789 '1.3.6.1.4.1.674.10892.1.700.12.1.11.1' => 'coolingDeviceUpperNonCriticalThreshold',
2792 if ($opt{use_get_table}) {
2793 my $coolingDeviceTable = '1.3.6.1.4.1.674.10892.1.700.12.1';
2794 $result = $snmp_session->get_table(-baseoid => $coolingDeviceTable);
2797 $result = $snmp_session->get_entries(-columns => [keys %cool_oid]);
2800 if ($blade && !defined $result) {
2803 elsif (!$blade && !defined $result) {
2804 printf "SNMP ERROR [cooling]: %s.\n", $snmp_session->error;
2805 $snmp_session->close;
2809 @output = @{ get_snmp_output($result, \%cool_oid) };
2812 @output = @{ run_omreport("$omopt_chassis fans") };
2816 foreach my $out (@output) {
2818 $index = $out->{coolingDeviceIndex};
2819 $status = $snmp_probestatus{$out->{coolingDeviceStatus}};
2820 $reading = $out->{coolingDeviceReading};
2821 $location = $out->{coolingDeviceLocationName};
2822 $max_crit = exists $out->{coolingDeviceUpperCriticalThreshold}
2823 ? $out->{coolingDeviceUpperCriticalThreshold} : 0;
2824 $max_warn = exists $out->{coolingDeviceUpperNonCriticalThreshold}
2825 ? $out->{coolingDeviceUpperNonCriticalThreshold} : 0;
2828 $index = $out->{'Index'};
2829 $status = $out->{'Status'};
2830 $reading = $out->{'Reading'};
2831 $location = $out->{'Probe Name'};
2832 $max_crit = $out->{'Maximum Failure Threshold'} ne '[N/A]'
2833 ? $out->{'Maximum Failure Threshold'} : 0;
2834 $max_warn = $out->{'Maximum Warning Threshold'} ne '[N/A]'
2835 ? $out->{'Maximum Warning Threshold'} : 0;
2836 $reading =~ s{\A (\d+).* \z}{$1}xms;
2837 $max_warn =~ s{\A (\d+).* \z}{$1}xms;
2838 $max_crit =~ s{\A (\d+).* \z}{$1}xms;
2841 next FAN if blacklisted('fan', $index);
2844 if ($status ne 'Ok') {
2845 my $msg = sprintf 'Chassis fan %d [%s] needs attention: %s',
2846 $index, $location, $status;
2847 my $err = $snmp ? $probestatus2nagios{$status} : $status2nagios{$status};
2848 report('chassis', $msg, $err, $index);
2851 my $msg = sprintf 'Chassis fan %d [%s]: %s',
2852 $index, $location, $reading;
2853 report('chassis', $msg, $E_OK, $index);
2856 # Collect performance data
2857 if (defined $opt{perfdata}) {
2858 my $pname = lc $location;
2859 $pname =~ s{\s}{_}gxms;
2860 $pname =~ s{proc_}{cpu#}xms;
2862 label => "fan_${index}_${pname}",
2874 #-----------------------------------------
2875 # CHASSIS: Check power supplies
2876 #-----------------------------------------
2877 sub check_powersupplies {
2878 return if blacklisted('ps', 'all');
2883 my $err_type = undef;
2891 '1.3.6.1.4.1.674.10892.1.600.12.1.2.1' => 'powerSupplyIndex',
2892 '1.3.6.1.4.1.674.10892.1.600.12.1.5.1' => 'powerSupplyStatus',
2893 '1.3.6.1.4.1.674.10892.1.600.12.1.7.1' => 'powerSupplyType',
2894 '1.3.6.1.4.1.674.10892.1.600.12.1.11.1' => 'powerSupplySensorState',
2895 '1.3.6.1.4.1.674.10892.1.600.12.1.12.1' => 'powerSupplyConfigurationErrorType',
2898 if ($opt{use_get_table}) {
2899 my $powerDeviceTable = '1.3.6.1.4.1.674.10892.1.600.12.1';
2900 $result = $snmp_session->get_table(-baseoid => $powerDeviceTable);
2903 $result = $snmp_session->get_entries(-columns => [keys %ps_oid]);
2906 # No instrumented PSU is OK (blades, low-end servers)
2907 return 0 if !defined $result;
2909 @output = @{ get_snmp_output($result, \%ps_oid) };
2912 @output = @{ run_omreport("$omopt_chassis pwrsupplies") };
2922 6 => 'Uninterruptible Power Supply',
2932 1 => 'Presence detected',
2933 2 => 'Failure detected',
2934 4 => 'Predictive Failure',
2936 16 => 'AC lost or out-of-range',
2937 32 => 'AC out-of-range but present',
2938 64 => 'Configuration error',
2941 my %ps_config_error_type
2943 1 => 'Vendor mismatch',
2944 2 => 'Revision mismatch',
2945 3 => 'Processor missing',
2949 foreach my $out (@output) {
2951 @states = (); # contains states for the PS
2953 $index = $out->{powerSupplyIndex} - 1;
2954 $status = $snmp_status{$out->{powerSupplyStatus}};
2955 $type = get_hashval($out->{powerSupplyType}, \%ps_type);
2956 $err_type = defined $out->{powerSupplyConfigurationErrorType}
2957 ? $ps_config_error_type{$out->{powerSupplyConfigurationErrorType}} : undef;
2959 # get the combined state from the StatusReading OID
2960 foreach my $mask (sort keys %ps_state) {
2961 if (($out->{powerSupplySensorState} & $mask) != 0) {
2962 push @states, $ps_state{$mask};
2966 # If configuration error, also include the error type
2967 if (defined $err_type) {
2968 push @states, $err_type;
2971 # Finally, construct the state string
2972 $state = join q{, }, @states;
2975 $index = $out->{'Index'};
2976 $status = $out->{'Status'};
2977 $type = $out->{'Type'};
2978 $state = $out->{'Online Status'};
2981 next PS if blacklisted('ps', $index);
2984 if ($status ne 'Ok') {
2985 my $msg = sprintf 'Power Supply %d [%s] needs attention: %s',
2986 $index, $type, $state;
2987 report('chassis', $msg, $status2nagios{$status}, $index);
2990 my $msg = sprintf 'Power Supply %d [%s]: %s',
2991 $index, $type, $state;
2992 report('chassis', $msg, $E_OK, $index);
2999 #-----------------------------------------
3000 # CHASSIS: Check temperatures
3001 #-----------------------------------------
3002 sub check_temperatures {
3003 return if blacklisted('temp', 'all');
3007 my $reading = undef;
3008 my $location = undef;
3009 my $max_crit = undef;
3010 my $max_warn = undef;
3011 my $min_warn = undef;
3012 my $min_crit = undef;
3014 my $discrete = undef;
3017 # Getting custom temperature thresholds (user option)
3018 my %warn_threshold = %{ custom_temperature_thresholds('w') };
3019 my %crit_threshold = %{ custom_temperature_thresholds('c') };
3024 '1.3.6.1.4.1.674.10892.1.700.20.1.2.1' => 'temperatureProbeIndex',
3025 '1.3.6.1.4.1.674.10892.1.700.20.1.5.1' => 'temperatureProbeStatus',
3026 '1.3.6.1.4.1.674.10892.1.700.20.1.6.1' => 'temperatureProbeReading',
3027 '1.3.6.1.4.1.674.10892.1.700.20.1.7.1' => 'temperatureProbeType',
3028 '1.3.6.1.4.1.674.10892.1.700.20.1.8.1' => 'temperatureProbeLocationName',
3029 '1.3.6.1.4.1.674.10892.1.700.20.1.10.1' => 'temperatureProbeUpperCriticalThreshold',
3030 '1.3.6.1.4.1.674.10892.1.700.20.1.11.1' => 'temperatureProbeUpperNonCriticalThreshold',
3031 '1.3.6.1.4.1.674.10892.1.700.20.1.12.1' => 'temperatureProbeLowerNonCriticalThreshold',
3032 '1.3.6.1.4.1.674.10892.1.700.20.1.13.1' => 'temperatureProbeLowerCriticalThreshold',
3033 '1.3.6.1.4.1.674.10892.1.700.20.1.16.1' => 'temperatureProbeDiscreteReading',
3035 # this didn't work well for some reason
3036 #my $result = $snmp_session->get_entries(-columns => [keys %temp_oid]);
3038 # Getting values using the table
3039 my $temperatureProbeTable = '1.3.6.1.4.1.674.10892.1.700.20';
3040 my $result = $snmp_session->get_table(-baseoid => $temperatureProbeTable);
3042 if (!defined $result) {
3043 printf "SNMP ERROR [temperatures]: %s.\n", $snmp_session->error;
3044 $snmp_session->close;
3048 @output = @{ get_snmp_output($result, \%temp_oid) };
3051 @output = @{ run_omreport("$omopt_chassis temps") };
3056 1 => 'Other', # type is other than following values
3057 2 => 'Unknown', # type is unknown
3058 3 => 'AmbientESM', # type is Ambient Embedded Systems Management temperature probe
3059 16 => 'Discrete', # type is temperature probe with discrete reading
3063 foreach my $out (@output) {
3065 $index = $out->{temperatureProbeIndex} - 1;
3066 $status = $snmp_probestatus{$out->{temperatureProbeStatus}};
3067 $location = $out->{temperatureProbeLocationName};
3068 $reading = exists $out->{temperatureProbeReading}
3069 ? $out->{temperatureProbeReading} / 10 : '[N/A]';
3070 $max_crit = exists $out->{temperatureProbeUpperCriticalThreshold}
3071 ? $out->{temperatureProbeUpperCriticalThreshold} / 10 : '[N/A]';
3072 $max_warn = exists $out->{temperatureProbeUpperNonCriticalThreshold}
3073 ? $out->{temperatureProbeUpperNonCriticalThreshold} / 10 : '[N/A]';
3074 $min_crit = exists $out->{temperatureProbeLowerCriticalThreshold}
3075 ? $out->{temperatureProbeLowerCriticalThreshold} / 10 : '[N/A]';
3076 $min_warn = exists $out->{temperatureProbeLowerNonCriticalThreshold}
3077 ? $out->{temperatureProbeLowerNonCriticalThreshold} / 10 : '[N/A]';
3078 $type = get_hashval($out->{temperatureProbeType}, \%probe_type);
3079 $discrete = exists $out->{temperatureProbeDiscreteReading}
3080 ? $out->{temperatureProbeDiscreteReading} : '[N/A]';
3081 # workaround for bad temp probes
3082 if ($type eq 'AmbientESM' and $reading !~ m{\A \d+(\.\d+)? \z}xms) {
3087 $index = $out->{'Index'};
3088 $status = $out->{'Status'};
3089 $reading = $out->{'Reading'}; $reading =~ s{\.0\s+C}{}xms;
3090 $location = $out->{'Probe Name'};
3091 $max_crit = $out->{'Maximum Failure Threshold'}; $max_crit =~ s{\.0\s+C}{}xms;
3092 $max_warn = $out->{'Maximum Warning Threshold'}; $max_warn =~ s{\.0\s+C}{}xms;
3093 $min_crit = $out->{'Minimum Failure Threshold'}; $min_crit =~ s{\.0\s+C}{}xms;
3094 $min_warn = $out->{'Minimum Warning Threshold'}; $min_warn =~ s{\.0\s+C}{}xms;
3095 $type = $reading =~ m{\A\d+\z}xms ? 'AmbientESM' : 'Discrete';
3096 $discrete = $reading;
3099 next TEMP if blacklisted('temp', $index);
3102 if ($type eq 'Discrete') {
3103 my $msg = sprintf 'Temperature probe %d [%s] is %s',
3104 $index, $location, $discrete;
3105 my $err = $snmp ? $probestatus2nagios{$status} : $status2nagios{$status};
3106 report('chassis', $msg, $err, $index);
3109 # First check according to custom thresholds
3110 if (exists $crit_threshold{$index}{max} and $reading > $crit_threshold{$index}{max}) {
3111 # Custom critical MAX
3112 my $msg = sprintf 'Temperature Probe %d [%s] reads %d C (custom max=%d)',
3113 $index, $location, $reading, $crit_threshold{$index}{max};
3114 report('chassis', $msg, $E_CRITICAL, $index);
3116 elsif (exists $warn_threshold{$index}{max} and $reading > $warn_threshold{$index}{max}) {
3117 # Custom warning MAX
3118 my $msg = sprintf 'Temperature Probe %d [%s] reads %d C (custom max=%d)',
3119 $index, $location, $reading, $warn_threshold{$index}{max};
3120 report('chassis', $msg, $E_WARNING, $index);
3122 elsif (exists $crit_threshold{$index}{min} and $reading < $crit_threshold{$index}{min}) {
3123 # Custom critical MIN
3124 my $msg = sprintf 'Temperature Probe %d [%s] reads %d C (custom min=%d)',
3125 $index, $location, $reading, $crit_threshold{$index}{min};
3126 report('chassis', $msg, $E_CRITICAL, $index);
3128 elsif (exists $warn_threshold{$index}{min} and $reading < $warn_threshold{$index}{min}) {
3129 # Custom warning MIN
3130 my $msg = sprintf 'Temperature Probe %d [%s] reads %d C (custom min=%d)',
3131 $index, $location, $reading, $warn_threshold{$index}{min};
3132 report('chassis', $msg, $E_WARNING, $index);
3134 elsif ($status ne 'Ok' and $max_crit ne '[N/A]' and $reading > $max_crit) {
3135 my $msg = sprintf 'Temperature Probe %d [%s] is critically high at %d C',
3136 $index, $location, $reading;
3137 my $err = $snmp ? $probestatus2nagios{$status} : $status2nagios{$status};
3138 report('chassis', $msg, $err, $index);
3140 elsif ($status ne 'Ok' and $max_warn ne '[N/A]' and $reading > $max_warn) {
3141 my $msg = sprintf 'Temperature Probe %d [%s] is too high at %d C',
3142 $index, $location, $reading;
3143 my $err = $snmp ? $probestatus2nagios{$status} : $status2nagios{$status};
3144 report('chassis', $msg, $err, $index);
3146 elsif ($status ne 'Ok' and $min_crit ne '[N/A]' and $reading < $min_crit) {
3147 my $msg = sprintf 'Temperature Probe %d [%s] is critically low at %d C',
3148 $index, $location, $reading;
3149 my $err = $snmp ? $probestatus2nagios{$status} : $status2nagios{$status};
3150 report('chassis', $msg, $err, $index);
3152 elsif ($status ne 'Ok' and $min_warn ne '[N/A]' and $reading < $min_warn) {
3153 my $msg = sprintf 'Temperature Probe %d [%s] is too low at %d C',
3154 $index, $location, $reading;
3155 my $err = $snmp ? $probestatus2nagios{$status} : $status2nagios{$status};
3156 report('chassis', $msg, $err, $index);
3160 my $msg = sprintf 'Temperature Probe %d [%s] reads %d C',
3161 $index, $location, $reading;
3162 if ($min_warn eq '[N/A]' and $min_crit eq '[N/A]') {
3163 $msg .= sprintf ' (max=%s/%s)', $max_warn, $max_crit;
3166 $msg .= sprintf ' (min=%s/%s, max=%s/%s)',
3167 $min_warn, $min_crit, $max_warn, $max_crit;
3169 my $err = $snmp ? $probestatus2nagios{$status} : $status2nagios{$status};
3170 report('chassis', $msg, $err, $index);
3173 # Collect performance data
3174 if (defined $opt{perfdata}) {
3175 my $pname = lc $location;
3176 $pname =~ s{\s}{_}gxms;
3177 $pname =~ s{_temp\z}{}xms;
3178 $pname =~ s{proc_}{cpu#}xms;
3180 label => "temp_${index}_${pname}",
3193 #-----------------------------------------
3194 # CHASSIS: Check processors
3195 #-----------------------------------------
3196 sub check_processors {
3197 return if blacklisted('cpu', 'all');
3210 # NOTE: For some reason, older models don't have the
3211 # "Processor Device Status" OIDs. We check both the newer
3212 # (preferred) OIDs and the old ones.
3216 '1.3.6.1.4.1.674.10892.1.1100.30.1.2.1' => 'processorDeviceIndex',
3217 '1.3.6.1.4.1.674.10892.1.1100.30.1.5.1' => 'processorDeviceStatus',
3218 '1.3.6.1.4.1.674.10892.1.1100.30.1.8.1' => 'processorDeviceManufacturerName',
3219 '1.3.6.1.4.1.674.10892.1.1100.30.1.9.1' => 'processorDeviceStatusState',
3220 '1.3.6.1.4.1.674.10892.1.1100.30.1.10.1' => 'processorDeviceFamily',
3221 '1.3.6.1.4.1.674.10892.1.1100.30.1.12.1' => 'processorDeviceCurrentSpeed',
3222 '1.3.6.1.4.1.674.10892.1.1100.30.1.23.1' => 'processorDeviceBrandName',
3223 '1.3.6.1.4.1.674.10892.1.1100.32.1.2.1' => 'processorDeviceStatusIndex',
3224 '1.3.6.1.4.1.674.10892.1.1100.32.1.5.1' => 'processorDeviceStatusStatus',
3225 '1.3.6.1.4.1.674.10892.1.1100.32.1.6.1' => 'processorDeviceStatusReading',
3228 if ($opt{use_get_table}) {
3229 my $processorDeviceTable = '1.3.6.1.4.1.674.10892.1.1100.30.1';
3230 my $processorDeviceStatusTable = '1.3.6.1.4.1.674.10892.1.1100.32.1';
3232 $result = $snmp_session->get_table(-baseoid => $processorDeviceTable);
3233 my $ext = $snmp_session->get_table(-baseoid => $processorDeviceStatusTable);
3235 defined $ext && map { $$result{$_} = $$ext{$_} } keys %{ $ext };
3238 $result = $snmp_session->get_entries(-columns => [keys %cpu_oid]);
3241 if (!defined $result) {
3242 printf "SNMP ERROR [processors]: %s.\n", $snmp_session->error;
3243 $snmp_session->close;
3247 @output = @{ get_snmp_output($result, \%cpu_oid) };
3250 @output = @{ run_omreport("$omopt_chassis processors") };
3255 1 => 'Other', # other than following values
3256 2 => 'Unknown', # unknown
3257 3 => 'Enabled', # enabled
3258 4 => 'User Disabled', # disabled by user via BIOS setup
3259 5 => 'BIOS Disabled', # disabled by BIOS (POST error)
3265 1 => 'Internal Error', # Internal Error
3266 2 => 'Thermal Trip', # Thermal Trip
3267 32 => 'Configuration Error', # Configuration Error
3268 128 => 'Present', # Processor Present
3269 256 => 'Disabled', # Processor Disabled
3270 512 => 'Terminator Present', # Terminator Present
3271 1024 => 'Throttled', # Processor Throttled
3274 # Mapping between family numbers from SNMP and actual CPU family
3277 1 => 'Other', 2 => 'Unknown',
3278 3 => '8086', 4 => '80286',
3279 5 => '386', 6 => '486',
3280 7 => '8087', 8 => '80287',
3281 9 => '80387', 10 => '80487',
3282 11 => 'Pentium', 12 => 'Pentium Pro',
3283 13 => 'Pentium II', 14 => 'Pentium with MMX',
3284 15 => 'Celeron', 16 => 'Pentium II Xeon',
3285 17 => 'Pentium III', 18 => 'Pentium III Xeon',
3286 19 => 'Pentium III', 20 => 'Itanium',
3287 21 => 'Xeon', 22 => 'Pentium 4',
3288 23 => 'Xeon MP', 24 => 'Itanium 2',
3289 25 => 'K5', 26 => 'K6',
3290 27 => 'K6-2', 28 => 'K6-3',
3291 29 => 'Athlon', 30 => 'AMD2900',
3292 31 => 'K6-2+', 32 => 'Power PC',
3293 33 => 'Power PC 601', 34 => 'Power PC 603',
3294 35 => 'Power PC 603+', 36 => 'Power PC 604',
3295 37 => 'Power PC 620', 38 => 'Power PC x704',
3296 39 => 'Power PC 750', 40 => 'Core Duo',
3297 41 => 'Core Duo mobile', 42 => 'Core Solo mobile',
3298 43 => 'Intel Atom', 44 => undef,
3299 45 => undef, 46 => undef,
3300 47 => undef, 48 => 'Alpha',
3301 49 => 'Alpha 21064', 50 => 'Alpha 21066',
3302 51 => 'Alpha 21164', 52 => 'Alpha 21164PC',
3303 53 => 'Alpha 21164a', 54 => 'Alpha 21264',
3304 55 => 'Alpha 21364', 56 => 'Turion II Ultra Dual-Core Mobile M',
3305 57 => 'Turion II Dual-Core Mobile M', 58 => 'Athlon II Dual-Core Mobile M ',
3306 59 => 'Opteron 6100', 60 => 'Opteron 4100',
3307 61 => undef, 62 => undef,
3308 63 => undef, 64 => 'MIPS',
3309 65 => 'MIPS R4000', 66 => 'MIPS R4200',
3310 67 => 'MIPS R4400', 68 => 'MIPS R4600',
3311 69 => 'MIPS R10000', 70 => undef,
3312 71 => undef, 72 => undef,
3313 73 => undef, 74 => undef,
3314 75 => undef, 76 => undef,
3315 77 => undef, 78 => undef,
3316 79 => undef, 80 => 'SPARC',
3317 81 => 'SuperSPARC', 82 => 'microSPARC II',
3318 83 => 'microSPARC IIep', 84 => 'UltraSPARC',
3319 85 => 'UltraSPARC II', 86 => 'UltraSPARC IIi',
3320 87 => 'UltraSPARC III', 88 => 'UltraSPARC IIIi',
3321 89 => undef, 90 => undef,
3322 91 => undef, 92 => undef,
3323 93 => undef, 94 => undef,
3324 95 => undef, 96 => '68040',
3325 97 => '68xxx', 98 => '68000',
3326 99 => '68010', 100 => '68020',
3327 101 => '68030', 102 => undef,
3328 103 => undef, 104 => undef,
3329 105 => undef, 106 => undef,
3330 107 => undef, 108 => undef,
3331 109 => undef, 110 => undef,
3332 111 => undef, 112 => 'Hobbit',
3333 113 => undef, 114 => undef,
3334 115 => undef, 116 => undef,
3335 117 => undef, 118 => undef,
3336 119 => undef, 120 => 'Crusoe TM5000',
3337 121 => 'Crusoe TM3000', 122 => 'Efficeon TM8000',
3338 123 => undef, 124 => undef,
3339 125 => undef, 126 => undef,
3340 127 => undef, 128 => 'Weitek',
3341 129 => undef, 130 => 'Celeron M',
3342 131 => 'Athlon 64', 132 => 'Opteron',
3343 133 => 'Sempron', 134 => 'Turion 64 Mobile',
3344 135 => 'Dual-Core Opteron', 136 => 'Athlon 64 X2 DC',
3345 137 => 'Turion 64 X2 M', 138 => 'Quad-Core Opteron',
3346 139 => '3rd gen Opteron', 140 => 'AMD Phenom FX Quad-Core',
3347 141 => 'AMD Phenom X4 Quad-Core', 142 => 'AMD Phenom X2 Dual-Core',
3348 143 => 'AMD Athlon X2 Dual-Core', 144 => 'PA-RISC',
3349 145 => 'PA-RISC 8500', 146 => 'PA-RISC 8000',
3350 147 => 'PA-RISC 7300LC', 148 => 'PA-RISC 7200',
3351 149 => 'PA-RISC 7100LC', 150 => 'PA-RISC 7100',
3352 151 => undef, 152 => undef,
3353 153 => undef, 154 => undef,
3354 155 => undef, 156 => undef,
3355 157 => undef, 158 => undef,
3356 159 => undef, 160 => 'V30',
3357 161 => 'Quad-Core Xeon 3200', 162 => 'Dual-Core Xeon 3000',
3358 163 => 'Quad-Core Xeon 5300', 164 => 'Dual-Core Xeon 5100',
3359 165 => 'Dual-Core Xeon 5000', 166 => 'Dual-Core Xeon LV',
3360 167 => 'Dual-Core Xeon ULV', 168 => 'Dual-Core Xeon 7100',
3361 169 => 'Quad-Core Xeon 5400', 170 => 'Quad-Core Xeon',
3362 171 => 'Dual-Core Xeon 5200', 172 => 'Dual-Core Xeon 7200',
3363 173 => 'Quad-Core Xeon 7300', 174 => 'Quad-Core Xeon 7400',
3364 175 => 'Multi-Core Xeon 7400', 176 => 'M1',
3365 177 => 'M2', 178 => undef,
3366 179 => 'Pentium 4 HT', 180 => 'AS400',
3367 181 => undef, 182 => 'Athlon XP',
3368 183 => 'Athlon MP', 184 => 'Duron',
3369 185 => 'Pentium M', 186 => 'Celeron D',
3370 187 => 'Pentium D', 188 => 'Pentium Extreme',
3371 189 => 'Core Solo', 190 => 'Core2',
3372 191 => 'Core2 Duo', 192 => 'Core2 Solo',
3373 193 => 'Core2 Extreme', 194 => 'Core2 Quad',
3374 195 => 'Core2 Extreme mobile', 196 => 'Core2 Duo mobile',
3375 197 => 'Core2 Solo mobile', 198 => 'Core i7',
3376 199 => 'Dual-Core Celeron', 200 => 'IBM390',
3377 201 => 'G4', 202 => 'G5',
3378 203 => 'ESA/390 G6', 204 => 'z/Architectur',
3379 205 => 'Core i5', 206 => 'Core i3',
3380 207 => undef, 208 => undef,
3381 209 => undef, 210 => 'C7-M',
3382 211 => 'C7-D', 212 => 'C7',
3383 213 => 'Eden', 214 => 'Multi-Core Xeon',
3384 215 => 'Dual-Core Xeon 3xxx', 216 => 'Quad-Core Xeon 3xxx',
3385 217 => 'VIA Nano', 218 => 'Dual-Core Xeon 5xxx',
3386 219 => 'Quad-Core Xeon 5xxx', 220 => undef,
3387 221 => 'Dual-Core Xeon 7xxx', 222 => 'Quad-Core Xeon 7xxx',
3388 223 => 'Multi-Core Xeon 7xxx', 224 => 'Multi-Core Xeon 3400',
3389 225 => undef, 226 => undef,
3390 227 => undef, 228 => undef,
3391 229 => undef, 230 => 'Embedded AMD Opteron Quad-Core',
3392 231 => 'AMD Phenom Triple-Core', 232 => 'AMD Turion Ultra Dual-Core Mobile',
3393 233 => 'AMD Turion Dual-Core Mobile', 234 => 'AMD Athlon Dual-Core',
3394 235 => 'AMD Sempron SI', 236 => 'AMD Phenom II',
3395 237 => 'AMD Athlon II', 238 => 'Six-Core AMD Opteron',
3396 239 => 'AMD Sempron M', 240 => undef,
3397 241 => undef, 242 => undef,
3398 243 => undef, 244 => undef,
3399 245 => undef, 246 => undef,
3400 247 => undef, 248 => undef,
3401 249 => undef, 250 => 'i860',
3406 foreach my $out (@output) {
3408 $index = exists $out->{processorDeviceStatusIndex}
3409 ? $out->{processorDeviceStatusIndex} - 1
3410 : $out->{processorDeviceIndex} - 1;
3411 $status = exists $out->{processorDeviceStatusStatus}
3412 ? $snmp_status{$out->{processorDeviceStatusStatus}}
3413 : $snmp_status{$out->{processorDeviceStatus}};
3414 if (exists $out->{processorDeviceStatusReading}) {
3415 my @states = (); # contains states for the CPU
3417 # get the combined state from the StatusReading OID
3418 foreach my $mask (sort keys %cpu_reading) {
3419 if (($out->{processorDeviceStatusReading} & $mask) != 0) {
3420 push @states, $cpu_reading{$mask};
3424 # Finally, create the state string
3425 $state = join q{, }, @states;
3428 $state = get_hashval($out->{processorDeviceStatusState}, \%cpu_state);
3430 $man = $out->{processorDeviceManufacturerName};
3431 $family = (exists $out->{processorDeviceFamily}
3432 and exists $cpu_family{$out->{processorDeviceFamily}})
3433 ? $cpu_family{$out->{processorDeviceFamily}} : undef;
3434 $speed = $out->{processorDeviceCurrentSpeed};
3435 $brand = $out->{processorDeviceBrandName};
3438 $index = $out->{'Index'};
3439 $status = $out->{'Status'};
3440 $state = $out->{'State'};
3441 $brand = exists $out->{'Processor Brand'} ? $out->{'Processor Brand'} : undef;
3442 $family = exists $out->{'Processor Family'} ? $out->{'Processor Family'} : undef;
3443 $man = exists $out->{'Processor Manufacturer'} ? $out->{'Processor Manufacturer'} : undef;
3444 $speed = exists $out->{'Current Speed'} ? $out->{'Current Speed'} : undef;
3447 next CPU if blacklisted('cpu', $index);
3449 # Ignore unoccupied CPU slots (omreport)
3450 next CPU if (defined $out->{'Processor Manufacturer'}
3451 and $out->{'Processor Manufacturer'} eq '[Not Occupied]')
3452 or (defined $out->{'Processor Brand'} and $out->{'Processor Brand'} eq '[Not Occupied]');
3454 # Ignore unoccupied CPU slots (snmp)
3455 if ($snmp and exists $out->{processorDeviceStatusReading}
3456 and $out->{processorDeviceStatusReading} == 0) {
3462 if (defined $brand) {
3463 $brand =~ s{\s\s+}{ }gxms;
3464 $brand =~ s{\((R|tm)\)}{}gxms;
3465 $brand =~ s{\s(CPU|Processor)}{}xms;
3466 $brand =~ s{\s\@}{}xms;
3468 elsif (defined $family and defined $man and defined $speed) {
3469 $speed =~ s{\A (\d+) .*}{$1}xms;
3470 $brand = sprintf '%s %s %.2fGHz', $man, $family, $speed / 1000;
3477 if ($status ne 'Ok') {
3478 my $msg = sprintf 'Processor %d [%s] needs attention: %s',
3479 $index, $brand, $state;
3480 report('chassis', $msg, $status2nagios{$status}, $index);
3484 my $msg = sprintf 'Processor %d [%s] is %s',
3485 $index, $brand, $state;
3486 report('chassis', $msg, $E_OK, $index);
3493 #-----------------------------------------
3494 # CHASSIS: Check voltage probes
3495 #-----------------------------------------
3497 return if blacklisted('volt', 'all');
3501 my $reading = undef;
3502 my $location = undef;
3508 '1.3.6.1.4.1.674.10892.1.600.20.1.2.1' => 'voltageProbeIndex',
3509 '1.3.6.1.4.1.674.10892.1.600.20.1.5.1' => 'voltageProbeStatus',
3510 '1.3.6.1.4.1.674.10892.1.600.20.1.6.1' => 'voltageProbeReading',
3511 '1.3.6.1.4.1.674.10892.1.600.20.1.8.1' => 'voltageProbeLocationName',
3512 '1.3.6.1.4.1.674.10892.1.600.20.1.16.1' => 'voltageProbeDiscreteReading',
3515 my $voltageProbeTable = '1.3.6.1.4.1.674.10892.1.600.20.1';
3516 my $result = $snmp_session->get_table(-baseoid => $voltageProbeTable);
3518 if (!defined $result) {
3519 printf "SNMP ERROR [voltage]: %s.\n", $snmp_session->error;
3520 $snmp_session->close;
3524 @output = @{ get_snmp_output($result, \%volt_oid) };
3527 @output = @{ run_omreport("$omopt_chassis volts") };
3530 my %volt_discrete_reading
3537 foreach my $out (@output) {
3539 $index = $out->{voltageProbeIndex} - 1;
3540 $status = $snmp_probestatus{$out->{voltageProbeStatus}};
3541 $reading = exists $out->{voltageProbeReading}
3542 ? sprintf('%.3f V', $out->{voltageProbeReading}/1000)
3543 : get_hashval($out->{voltageProbeDiscreteReading}, \%volt_discrete_reading);
3544 $location = $out->{voltageProbeLocationName};
3547 $index = $out->{'Index'};
3548 $status = $out->{'Status'};
3549 $reading = $out->{'Reading'};
3550 $location = $out->{'Probe Name'};
3553 next VOLT if blacklisted('volt', $index);
3556 my $msg = sprintf 'Voltage sensor %d [%s] is %s',
3557 $index, $location, $reading;
3558 my $err = $snmp ? $probestatus2nagios{$status} : $status2nagios{$status};
3559 report('chassis', $msg, $err, $index);
3565 #-----------------------------------------
3566 # CHASSIS: Check batteries
3567 #-----------------------------------------
3568 sub check_batteries {
3569 return if blacklisted('bp', 'all');
3573 my $reading = undef;
3574 my $location = undef;
3580 '1.3.6.1.4.1.674.10892.1.600.50.1.2.1' => 'batteryIndex',
3581 '1.3.6.1.4.1.674.10892.1.600.50.1.5.1' => 'batteryStatus',
3582 '1.3.6.1.4.1.674.10892.1.600.50.1.6.1' => 'batteryReading',
3583 '1.3.6.1.4.1.674.10892.1.600.50.1.7.1' => 'batteryLocationName',
3586 if ($opt{use_get_table}) {
3587 my $batteryTable = '1.3.6.1.4.1.674.10892.1.600.50.1';
3588 $result = $snmp_session->get_table(-baseoid => $batteryTable);
3591 $result = $snmp_session->get_entries(-columns => [keys %bat_oid]);
3594 # No batteries is OK
3595 return 0 if !defined $result;
3597 @output = @{ get_snmp_output($result, \%bat_oid) };
3600 @output = @{ run_omreport("$omopt_chassis batteries") };
3605 1 => 'Predictive Failure',
3607 4 => 'Presence Detected',
3611 foreach my $out (@output) {
3613 $index = $out->{batteryIndex} - 1;
3614 $status = $snmp_status{$out->{batteryStatus}};
3615 $reading = get_hashval($out->{batteryReading}, \%bat_reading);
3616 $location = $out->{batteryLocationName};
3619 $index = $out->{'Index'};
3620 $status = $out->{'Status'};
3621 $reading = $out->{'Reading'};
3622 $location = $out->{'Probe Name'};
3625 next BATTERY if blacklisted('bp', $index);
3628 my $msg = sprintf 'Battery probe %d [%s] is %s',
3629 $index, $location, $reading;
3630 report('chassis', $msg, $status2nagios{$status}, $index);
3636 #-----------------------------------------
3637 # CHASSIS: Check amperage probes (power monitoring)
3638 #-----------------------------------------
3639 sub check_pwrmonitoring {
3640 return if blacklisted('amp', 'all');
3644 my $reading = undef;
3645 my $location = undef;
3646 my $max_crit = undef;
3647 my $max_warn = undef;
3655 '1.3.6.1.4.1.674.10892.1.600.30.1.2.1' => 'amperageProbeIndex',
3656 '1.3.6.1.4.1.674.10892.1.600.30.1.5.1' => 'amperageProbeStatus',
3657 '1.3.6.1.4.1.674.10892.1.600.30.1.6.1' => 'amperageProbeReading',
3658 '1.3.6.1.4.1.674.10892.1.600.30.1.7.1' => 'amperageProbeType',
3659 '1.3.6.1.4.1.674.10892.1.600.30.1.8.1' => 'amperageProbeLocationName',
3660 '1.3.6.1.4.1.674.10892.1.600.30.1.10.1' => 'amperageProbeUpperCriticalThreshold',
3661 '1.3.6.1.4.1.674.10892.1.600.30.1.11.1' => 'amperageProbeUpperNonCriticalThreshold',
3662 '1.3.6.1.4.1.674.10892.1.600.30.1.16.1' => 'amperageProbeDiscreteReading',
3665 if ($opt{use_get_table}) {
3666 my $amperageProbeTable = '1.3.6.1.4.1.674.10892.1.600.30.1';
3667 $result = $snmp_session->get_table(-baseoid => $amperageProbeTable);
3670 $result = $snmp_session->get_entries(-columns => [keys %amp_oid]);
3673 # No pwrmonitoring is OK
3674 return 0 if !defined $result;
3676 @output = @{ get_snmp_output($result, \%amp_oid) };
3679 @output = @{ run_omreport("$omopt_chassis pwrmonitoring") };
3682 my %amp_type # Amperage probe types
3684 1 => 'amperageProbeTypeIsOther', # other than following values
3685 2 => 'amperageProbeTypeIsUnknown', # unknown
3686 3 => 'amperageProbeTypeIs1Point5Volt', # 1.5 amperage probe
3687 4 => 'amperageProbeTypeIs3Point3volt', # 3.3 amperage probe
3688 5 => 'amperageProbeTypeIs5Volt', # 5 amperage probe
3689 6 => 'amperageProbeTypeIsMinus5Volt', # -5 amperage probe
3690 7 => 'amperageProbeTypeIs12Volt', # 12 amperage probe
3691 8 => 'amperageProbeTypeIsMinus12Volt', # -12 amperage probe
3692 9 => 'amperageProbeTypeIsIO', # I/O probe
3693 10 => 'amperageProbeTypeIsCore', # Core probe
3694 11 => 'amperageProbeTypeIsFLEA', # FLEA (standby) probe
3695 12 => 'amperageProbeTypeIsBattery', # Battery probe
3696 13 => 'amperageProbeTypeIsTerminator', # SCSI Termination probe
3697 14 => 'amperageProbeTypeIs2Point5Volt', # 2.5 amperage probe
3698 15 => 'amperageProbeTypeIsGTL', # GTL (ground termination logic) probe
3699 16 => 'amperageProbeTypeIsDiscrete', # amperage probe with discrete reading
3700 23 => 'amperageProbeTypeIsPowerSupplyAmps', # Power Supply probe with reading in Amps
3701 24 => 'amperageProbeTypeIsPowerSupplyWatts', # Power Supply probe with reading in Watts
3702 25 => 'amperageProbeTypeIsSystemAmps', # System probe with reading in Amps
3703 26 => 'amperageProbeTypeIsSystemWatts', # System probe with reading in Watts
3714 'amperageProbeTypeIsPowerSupplyAmps' => 'hA', # tenths of Amps
3715 'amperageProbeTypeIsSystemAmps' => 'hA', # tenths of Amps
3716 'amperageProbeTypeIsPowerSupplyWatts' => 'W', # Watts
3717 'amperageProbeTypeIsSystemWatts' => 'W', # Watts
3718 'amperageProbeTypeIsDiscrete' => q{}, # discrete reading, no unit
3722 foreach my $out (@output) {
3724 $index = $out->{amperageProbeIndex} - 1;
3725 $status = $snmp_status{$out->{amperageProbeStatus}};
3726 $type = get_hashval($out->{amperageProbeType}, \%amp_type);
3727 $reading = $type eq 'amperageProbeTypeIsDiscrete'
3728 ? get_hashval($out->{amperageProbeDiscreteReading}, \%amp_discrete)
3729 : $out->{amperageProbeReading};
3730 $location = $out->{amperageProbeLocationName};
3731 $max_crit = exists $out->{amperageProbeUpperCriticalThreshold}
3732 ? $out->{amperageProbeUpperCriticalThreshold} : 0;
3733 $max_warn = exists $out->{amperageProbeUpperNonCriticalThreshold}
3734 ? $out->{amperageProbeUpperNonCriticalThreshold} : 0;
3735 $unit = exists $amp_unit{$amp_type{$out->{amperageProbeType}}}
3736 ? $amp_unit{$amp_type{$out->{amperageProbeType}}} : 'mA';
3738 # workaround for broken probes
3739 if (!defined $reading) {
3740 $type = 'amperageProbeTypeIsDiscrete';
3745 # calculate proper values and set unit for ampere probes
3746 if ($unit eq 'hA' and $type ne 'amperageProbeTypeIsDiscrete') {
3754 $index = $out->{'Index'};
3755 next AMP if (!defined $index || $index !~ m/^\d+$/x);
3756 $status = $out->{'Status'};
3757 $reading = $out->{'Reading'};
3758 $location = $out->{'Probe Name'};
3759 $max_crit = $out->{'Failure Threshold'} ne '[N/A]'
3760 ? $out->{'Failure Threshold'} : 0;
3761 $max_warn = $out->{'Warning Threshold'} ne '[N/A]'
3762 ? $out->{'Warning Threshold'} : 0;
3763 $reading =~ s{\A (\d+.*?)\s+([a-zA-Z]+) \s*\z}{$1}xms;
3765 $max_warn =~ s{\A (\d+.*?)\s+[a-zA-Z]+ \s*\z}{$1}xms;
3766 $max_crit =~ s{\A (\d+.*?)\s+[a-zA-Z]+ \s*\z}{$1}xms;
3769 next AMP if blacklisted('amp', $index);
3770 next AMP if $index !~ m{\A \d+ \z}xms;
3773 if (defined $type and $type eq 'amperageProbeTypeIsDiscrete') {
3774 my $msg = sprintf 'Amperage probe %d [%s] is %s',
3775 $index, $location, $reading;
3776 report('chassis', $msg, $status2nagios{$status}, $index);
3779 my $msg = sprintf 'Amperage probe %d [%s] reads %s %s',
3780 $index, $location, $reading, $unit;
3781 report('chassis', $msg, $status2nagios{$status}, $index);
3784 # Collect performance data
3785 if (defined $opt{perfdata}) {
3786 next AMP if $reading !~ m{\A \d+(\.\d+)? \z}xms; # discrete reading (not number)
3787 #next AMP if $type eq 'amperageProbeTypeIsDiscrete';
3788 my $label = join q{_}, 'pwr_mon', $index, lc $location;
3789 $label =~ s{\s}{_}gxms;
3792 mini => "p${index}" . lc $unit,
3800 # Collect EXTRA performance data not found at first run. This is a
3802 if (defined $opt{perfdata} && !$snmp) {
3808 foreach (@perfdata) {
3809 if ($_->{label} =~ m/\A pwr_mon_(\d+)/xms) {
3815 foreach my $line (@{ run_command("$omreport $omopt_chassis pwrmonitoring -fmt ssv") }) {
3817 if ($line eq 'Location;Reading') {
3825 if ($found and $line =~ m/\A ([^;]+?) ; (\d*\.\d+) \s [AW] \z/xms) {
3828 $aname =~ s{\s}{_}gxms;
3830 # don't use an existing index
3831 while (exists $used{$index}) { ++$index; }
3834 label => "pwr_mon_${index}_${aname}",
3835 mini => "p${index}a",
3849 #-----------------------------------------
3850 # CHASSIS: Check intrusion
3851 #-----------------------------------------
3852 sub check_intrusion {
3853 return if blacklisted('intr', 'all');
3857 my $reading = undef;
3863 '1.3.6.1.4.1.674.10892.1.300.70.1.2.1' => 'intrusionIndex',
3864 '1.3.6.1.4.1.674.10892.1.300.70.1.5.1' => 'intrusionStatus',
3865 '1.3.6.1.4.1.674.10892.1.300.70.1.6.1' => 'intrusionReading',
3868 if ($opt{use_get_table}) {
3869 my $intrusionTable = '1.3.6.1.4.1.674.10892.1.300.70.1';
3870 $result = $snmp_session->get_table(-baseoid => $intrusionTable);
3873 $result = $snmp_session->get_entries(-columns => [keys %int_oid]);
3876 # No intrusion is OK
3877 return 0 if !defined $result;
3879 @output = @{ get_snmp_output($result, \%int_oid) };
3882 @output = @{ run_omreport("$omopt_chassis intrusion") };
3887 1 => 'Not Breached', # chassis not breached and no uncleared breaches
3888 2 => 'Breached', # chassis currently breached
3889 3 => 'Breached Prior', # chassis breached prior to boot and has not been cleared
3890 4 => 'Breach Sensor Failure', # intrusion sensor has failed
3894 foreach my $out (@output) {
3896 $index = $out->{intrusionIndex} - 1;
3897 $status = $snmp_status{$out->{intrusionStatus}};
3898 $reading = get_hashval($out->{intrusionReading}, \%int_reading);
3901 $index = $out->{'Index'};
3902 $status = $out->{'Status'};
3903 $reading = $out->{'State'};
3906 next INTRUSION if blacklisted('intr', $index);
3909 if ($status ne 'Ok') {
3910 my $msg = sprintf 'Chassis intrusion %d detected: %s',
3912 report('chassis', $msg, $E_WARNING, $index);
3916 my $msg = sprintf 'Chassis intrusion %d detection: %s (%s)',
3917 $index, $status, $reading;
3918 report('chassis', $msg, $E_OK, $index);
3925 #-----------------------------------------
3926 # CHASSIS: Check alert log
3927 #-----------------------------------------
3928 sub check_alertlog {
3929 return if $snmp; # Not supported with SNMP
3931 my @output = @{ run_omreport("$omopt_system alertlog") };
3932 foreach my $out (@output) {
3933 ++$count{alert}{$out->{Severity}};
3936 # Create error messages and set exit value if appropriate
3938 if ($count{alert}{'Critical'} > 0) { $err = $E_CRITICAL; }
3939 elsif ($count{alert}{'Non-Critical'} > 0) { $err = $E_WARNING; }
3941 my $msg = sprintf 'Alert log content: %d critical, %d non-critical, %d ok',
3942 $count{alert}{'Critical'}, $count{alert}{'Non-Critical'}, $count{alert}{'Ok'};
3943 report('other', $msg, $err);
3948 #-----------------------------------------
3949 # CHASSIS: Check ESM log overall health
3950 #-----------------------------------------
3951 sub check_esmlog_health {
3955 my $systemStateEventLogStatus = '1.3.6.1.4.1.674.10892.1.200.10.1.41.1';
3956 my $result = $snmp_session->get_request(-varbindlist => [$systemStateEventLogStatus]);
3957 if (!defined $result) {
3958 my $msg = sprintf 'SNMP ERROR [esmhealth]: %s',
3959 $snmp_session->error;
3960 report('other', $msg, $E_UNKNOWN);
3962 $health = $snmp_status{$result->{$systemStateEventLogStatus}};
3965 foreach (@{ run_command("$omreport $omopt_system esmlog -fmt ssv") }) {
3966 if (m/\A Health;(.+) \z/xms) {
3974 # If the overall health of the ESM log is other than "Ok", the
3975 # fill grade of the log is more than 80% and the log should be
3977 if ($health eq 'Ok') {
3978 my $msg = sprintf 'ESM log health is Ok (less than 80%% full)';
3979 report('other', $msg, $E_OK);
3981 elsif ($health eq 'Critical') {
3982 my $msg = sprintf 'ESM log is 100%% full';
3983 report('other', $msg, $status2nagios{$health});
3986 my $msg = sprintf 'ESM log is more than 80%% full';
3987 report('other', $msg, $status2nagios{$health});
3993 #-----------------------------------------
3994 # CHASSIS: Check ESM log
3995 #-----------------------------------------
4002 '1.3.6.1.4.1.674.10892.1.300.40.1.7.1' => 'eventLogSeverityStatus',
4004 my $result = $snmp_session->get_entries(-columns => [keys %esm_oid]);
4007 return if !defined $result;
4009 @output = @{ get_snmp_output($result, \%esm_oid) };
4010 foreach my $out (@output) {
4011 ++$count{esm}{$snmp_status{$out->{eventLogSeverityStatus}}};
4015 @output = @{ run_omreport("$omopt_system esmlog") };
4016 foreach my $out (@output) {
4017 ++$count{esm}{$out->{Severity}};
4021 # Create error messages and set exit value if appropriate
4023 if ($count{esm}{'Critical'} > 0) { $err = $E_CRITICAL; }
4024 elsif ($count{esm}{'Non-Critical'} > 0) { $err = $E_WARNING; }
4026 my $msg = sprintf 'ESM log content: %d critical, %d non-critical, %d ok',
4027 $count{esm}{'Critical'}, $count{esm}{'Non-Critical'}, $count{esm}{'Ok'};
4028 report('other', $msg, $err);
4034 # Handy function for checking all storage components
4037 check_controllers();
4038 check_physical_disks();
4039 check_virtual_disks();
4040 check_cache_battery();
4043 check_enclosure_fans();
4044 check_enclosure_pwr();
4045 check_enclosure_temp();
4046 check_enclosure_emms();
4052 #---------------------------------------------------------------------
4054 #---------------------------------------------------------------------
4057 # Fetch output from 'omreport chassis info', put in sysinfo hash
4059 sub get_omreport_chassis_info {
4060 if (open my $INFO, '-|', "$omreport $omopt_chassis info -fmt ssv") {
4061 my @lines = <$INFO>;
4064 next if !m/\A (Chassis\sModel|Chassis\sService\sTag|Model|Service\sTag|System\sRevision)/xms;
4065 my ($key, $val) = split /;/xms;
4066 $key =~ s{\s+\z}{}xms; # remove trailing whitespace
4067 $val =~ s{\s+\z}{}xms; # remove trailing whitespace
4068 if ($key eq 'Chassis Model' or $key eq 'Model') {
4069 $sysinfo{model} = $val;
4071 if ($key eq 'Chassis Service Tag' or $key eq 'Service Tag') {
4072 $sysinfo{serial} = $val;
4074 if ($key eq 'System Revision') {
4075 $sysinfo{rev} = q{ } . $val;
4083 # Fetch output from 'omreport chassis bios', put in sysinfo hash
4085 sub get_omreport_chassis_bios {
4086 if (open my $BIOS, '-|', "$omreport $omopt_chassis bios -fmt ssv") {
4087 my @lines = <$BIOS>;
4091 my ($key, $val) = split /;/xms;
4092 $key =~ s{\s+\z}{}xms; # remove trailing whitespace
4093 $val =~ s{\s+\z}{}xms; # remove trailing whitespace
4094 $sysinfo{bios} = $val if $key eq 'Version';
4095 $sysinfo{biosdate} = $val if $key eq 'Release Date';
4102 # Fetch output from 'omreport system operatingsystem', put in sysinfo hash
4104 sub get_omreport_system_operatingsystem {
4105 if (open my $VER, '-|', "$omreport $omopt_system operatingsystem -fmt ssv") {
4110 my ($key, $val) = split /;/xms;
4111 $key =~ s{\s+\z}{}xms; # remove trailing whitespace
4112 $val =~ s{\s+\z}{}xms; # remove trailing whitespace
4113 if ($key eq 'Operating System') {
4114 $sysinfo{osname} = $val;
4116 elsif ($key eq 'Operating System Version') {
4117 $sysinfo{osver} = $val;
4125 # Fetch output from 'omreport about', put in sysinfo hash
4127 sub get_omreport_about {
4128 if (open my $OM, '-|', "$omreport about -fmt ssv") {
4132 if (m/\A Version;(.+) \z/xms) {
4142 # Fetch chassis info via SNMP, put in sysinfo hash
4144 sub get_snmp_chassis_info {
4147 '1.3.6.1.4.1.674.10892.1.300.10.1.9.1' => 'chassisModelName',
4148 '1.3.6.1.4.1.674.10892.1.300.10.1.11.1' => 'chassisServiceTagName',
4149 '1.3.6.1.4.1.674.10892.1.300.10.1.48.1' => 'chassisSystemRevisionName',
4152 my $chassisInformationTable = '1.3.6.1.4.1.674.10892.1.300.10.1';
4153 my $result = $snmp_session->get_table(-baseoid => $chassisInformationTable);
4155 if (defined $result) {
4156 foreach my $oid (keys %{ $result }) {
4157 if (exists $chassis_oid{$oid} and $chassis_oid{$oid} eq 'chassisModelName') {
4158 $sysinfo{model} = $result->{$oid};
4159 $sysinfo{model} =~ s{\s+\z}{}xms; # remove trailing whitespace
4161 elsif (exists $chassis_oid{$oid} and $chassis_oid{$oid} eq 'chassisServiceTagName') {
4162 $sysinfo{serial} = $result->{$oid};
4164 elsif (exists $chassis_oid{$oid} and $chassis_oid{$oid} eq 'chassisSystemRevisionName') {
4165 $sysinfo{rev} = q{ } . $result->{$oid};
4170 my $msg = sprintf 'SNMP ERROR getting chassis info: %s',
4171 $snmp_session->error;
4172 report('other', $msg, $E_UNKNOWN);
4178 # Fetch BIOS info via SNMP, put in sysinfo hash
4180 sub get_snmp_chassis_bios {
4183 '1.3.6.1.4.1.674.10892.1.300.50.1.7.1.1' => 'systemBIOSReleaseDateName',
4184 '1.3.6.1.4.1.674.10892.1.300.50.1.8.1.1' => 'systemBIOSVersionName',
4187 my $systemBIOSTable = '1.3.6.1.4.1.674.10892.1.300.50.1';
4188 my $result = $snmp_session->get_table(-baseoid => $systemBIOSTable);
4190 if (defined $result) {
4191 foreach my $oid (keys %{ $result }) {
4192 if (exists $bios_oid{$oid} and $bios_oid{$oid} eq 'systemBIOSReleaseDateName') {
4193 $sysinfo{biosdate} = $result->{$oid};
4194 $sysinfo{biosdate} =~ s{\A (\d{4})(\d{2})(\d{2}).*}{$2/$3/$1}xms;
4196 elsif (exists $bios_oid{$oid} and $bios_oid{$oid} eq 'systemBIOSVersionName') {
4197 $sysinfo{bios} = $result->{$oid};
4202 my $msg = sprintf 'SNMP ERROR getting BIOS info: %s',
4203 $snmp_session->error;
4204 report('other', $msg, $E_UNKNOWN);
4210 # Fetch OS info via SNMP, put in sysinfo hash
4212 sub get_snmp_system_operatingsystem {
4215 '1.3.6.1.4.1.674.10892.1.400.10.1.6.1' => 'operatingSystemOperatingSystemName',
4216 '1.3.6.1.4.1.674.10892.1.400.10.1.7.1' => 'operatingSystemOperatingSystemVersionName',
4219 my $operatingSystemTable = '1.3.6.1.4.1.674.10892.1.400.10.1';
4220 my $result = $snmp_session->get_table(-baseoid => $operatingSystemTable);
4222 if (defined $result) {
4223 foreach my $oid (keys %{ $result }) {
4224 if (exists $os_oid{$oid} and $os_oid{$oid} eq 'operatingSystemOperatingSystemName') {
4225 $sysinfo{osname} = ($result->{$oid});
4227 elsif (exists $os_oid{$oid} and $os_oid{$oid} eq 'operatingSystemOperatingSystemVersionName') {
4228 $sysinfo{osver} = $result->{$oid};
4233 my $msg = sprintf 'SNMP ERROR getting OS info: %s',
4234 $snmp_session->error;
4235 report('other', $msg, $E_UNKNOWN);
4241 # Fetch OMSA version via SNMP, put in sysinfo hash
4243 sub get_snmp_about {
4244 # systemManagementSoftwareGlobalVersionName
4245 my $oid = '1.3.6.1.4.1.674.10892.1.100.10.0';
4246 my $result = $snmp_session->get_request(-varbindlist => [$oid]);
4248 if (defined $result) {
4249 $sysinfo{om} = exists $result->{$oid} && $result->{$oid} ne q{}
4250 ? $result->{$oid} : 'unknown';
4253 my $msg = sprintf 'SNMP ERROR: Getting OMSA version failed: %s', $snmp_session->error;
4254 report('other', $msg, $E_UNKNOWN);
4260 # Collects some information about the system
4264 # Get system model and serial number
4265 $snmp ? get_snmp_chassis_info() : get_omreport_chassis_info();
4267 # Get BIOS information. Only if needed
4268 if ( $opt{okinfo} >= 1
4270 or (defined $opt{postmsg} and $opt{postmsg} =~ m/[%][bd]/xms) ) {
4271 $snmp ? get_snmp_chassis_bios() : get_omreport_chassis_bios();
4274 # Get OMSA information. Only if needed
4275 if ($opt{okinfo} >= 3 or $opt{debug}) {
4276 $snmp ? get_snmp_about() : get_omreport_about();
4279 # Return now if debug
4280 return if $opt{debug};
4282 # Get OS information. Only if needed
4283 if (defined $opt{postmsg} and $opt{postmsg} =~ m/[%][or]/xms) {
4284 $snmp ? get_snmp_system_operatingsystem() : get_omreport_system_operatingsystem();
4291 # Helper function for running omreport when the results are strictly
4293 sub run_omreport_info {
4294 my $command = shift;
4298 # Run omreport and fetch output
4299 my $rawtext = slurp_command("$omreport $command -fmt ssv 2>&1");
4301 # Parse output, store in array
4302 for ((split /\n/xms, $rawtext)) {
4303 if (m/\A Error/xms) {
4304 my $msg = "Problem running 'omreport $command': $_";
4305 report('other', $msg, $E_UNKNOWN);
4307 next if !m/;/xms; # ignore lines with less than two fields
4308 my @vals = split m/;/xms;
4309 $output{$vals[0]} = $vals[1];
4312 # Finally, return the collected information
4316 # Get various firmware information (BMC, RAC)
4317 sub get_firmware_info {
4318 my @snmp_output = ();
4319 my %nrpe_output = ();
4324 '1.3.6.1.4.1.674.10892.1.300.60.1.7.1' => 'firmwareType',
4325 '1.3.6.1.4.1.674.10892.1.300.60.1.8.1' => 'firmwareTypeName',
4326 '1.3.6.1.4.1.674.10892.1.300.60.1.11.1' => 'firmwareVersionName',
4329 my $firmwareTable = '1.3.6.1.4.1.674.10892.1.300.60.1';
4330 my $result = $snmp_session->get_table(-baseoid => $firmwareTable);
4332 # Some don't have this OID, this is ok
4333 if (!defined $result) {
4337 @snmp_output = @{ get_snmp_output($result, \%fw_oid) };
4340 %nrpe_output = %{ run_omreport_info("$omopt_chassis info") };
4343 my %fw_type # Firmware types
4345 1 => 'other', # other than following values
4346 2 => 'unknown', # unknown
4347 3 => 'systemBIOS', # System BIOS
4348 4 => 'embeddedSystemManagementController', # Embedded System Management Controller
4349 5 => 'powerSupplyParallelingBoard', # Power Supply Paralleling Board
4350 6 => 'systemBackPlane', # System (Primary) Backplane
4351 7 => 'powerVault2XXSKernel', # PowerVault 2XXS Kernel
4352 8 => 'powerVault2XXSApplication', # PowerVault 2XXS Application
4353 9 => 'frontPanel', # Front Panel Controller
4354 10 => 'baseboardManagementController', # Baseboard Management Controller
4355 11 => 'hotPlugPCI', # Hot Plug PCI Controller
4356 12 => 'sensorData', # Sensor Data Records
4357 13 => 'peripheralBay', # Peripheral Bay Backplane
4358 14 => 'secondaryBackPlane', # Secondary Backplane for ESM 2 systems
4359 15 => 'secondaryBackPlaneESM3And4', # Secondary Backplane for ESM 3 and 4 systems
4360 16 => 'rac', # Remote Access Controller
4361 17 => 'imc' # Integrated Management Controller
4366 foreach my $out (@snmp_output) {
4367 if ($fw_type{$out->{firmwareType}} eq 'baseboardManagementController') {
4368 $sysinfo{'bmc'} = 1;
4369 $sysinfo{'bmc_fw'} = $out->{firmwareVersionName};
4371 elsif ($fw_type{$out->{firmwareType}} =~ m{\A rac|imc \z}xms) {
4372 my $name = $out->{firmwareTypeName}; $name =~ s/\s//gxms;
4373 $sysinfo{'rac'} = 1;
4374 $sysinfo{'rac_name'} = $name;
4375 $sysinfo{'rac_fw'} = $out->{firmwareVersionName};
4380 foreach my $key (keys %nrpe_output) {
4381 next if !defined $nrpe_output{$key};
4382 if ($key eq 'BMC Version' or $key eq 'Baseboard Management Controller Version') {
4383 $sysinfo{'bmc'} = 1;
4384 $sysinfo{'bmc_fw'} = $nrpe_output{$key};
4386 elsif ($key =~ m{\A (i?DRAC)\s*(\d?)\s+Version}xms) {
4388 $sysinfo{'rac'} = 1;
4389 $sysinfo{'rac_fw'} = $nrpe_output{$key};
4390 $sysinfo{'rac_name'} = $name;
4400 #=====================================================================
4402 #=====================================================================
4404 # Here we do the actual checking of components
4405 # Check global status if applicable
4407 $globalstatus = check_global();
4410 # Do multiple selected checks
4411 if ($check{storage}) { check_storage(); }
4412 if ($check{memory}) { check_memory(); }
4413 if ($check{fans}) { check_fans(); }
4414 if ($check{power}) { check_powersupplies(); }
4415 if ($check{temp}) { check_temperatures(); }
4416 if ($check{cpu}) { check_processors(); }
4417 if ($check{voltage}) { check_volts(); }
4418 if ($check{batteries}) { check_batteries(); }
4419 if ($check{amperage}) { check_pwrmonitoring(); }
4420 if ($check{intrusion}) { check_intrusion(); }
4421 if ($check{alertlog}) { check_alertlog(); }
4422 if ($check{esmlog}) { check_esmlog(); }
4423 if ($check{esmhealth}) { check_esmlog_health(); }
4426 #---------------------------------------------------------------------
4428 #---------------------------------------------------------------------
4439 # Get system information
4442 # Get firmware info if requested via option
4443 if ($opt{okinfo} >= 1) {
4444 get_firmware_info();
4447 # Close SNMP session
4449 $snmp_session->close;
4454 print " System: $sysinfo{model}$sysinfo{rev}\n";
4455 print " ServiceTag: $sysinfo{serial}";
4456 print q{ } x (25 - length $sysinfo{serial}), "OMSA version: $sysinfo{om}\n";
4457 print " BIOS/date: $sysinfo{bios} $sysinfo{biosdate}";
4458 print q{ } x (25 - length "$sysinfo{bios} $sysinfo{biosdate}"), "Plugin version: $VERSION\n";
4459 if ($#report_storage >= 0) {
4460 print "-----------------------------------------------------------------------------\n";
4461 print " Storage Components \n";
4462 print "=============================================================================\n";
4463 print " STATE | ID | MESSAGE TEXT \n";
4464 print "---------+----------+--------------------------------------------------------\n";
4465 foreach (@report_storage) {
4466 my ($msg, $level, $nexus) = @{$_};
4467 print q{ } x (8 - length $reverse_exitcode{$level}) . "$reverse_exitcode{$level} | "
4468 . q{ } x (8 - length $nexus) . "$nexus | $msg\n";
4469 $nagios_alert_count{$reverse_exitcode{$level}}++;
4472 if ($#report_chassis >= 0) {
4473 print "-----------------------------------------------------------------------------\n";
4474 print " Chassis Components \n";
4475 print "=============================================================================\n";
4476 print " STATE | ID | MESSAGE TEXT \n";
4477 print "---------+------+------------------------------------------------------------\n";
4478 foreach (@report_chassis) {
4479 my ($msg, $level, $nexus) = @{$_};
4480 print q{ } x (8 - length $reverse_exitcode{$level}) . "$reverse_exitcode{$level} | "
4481 . q{ } x (4 - length $nexus) . "$nexus | $msg\n";
4482 $nagios_alert_count{$reverse_exitcode{$level}}++;
4485 if ($#report_other >= 0) {
4486 print "-----------------------------------------------------------------------------\n";
4487 print " Other messages \n";
4488 print "=============================================================================\n";
4489 print " STATE | MESSAGE TEXT \n";
4490 print "---------+-------------------------------------------------------------------\n";
4491 foreach (@report_other) {
4492 my ($msg, $level, $nexus) = @{$_};
4493 print q{ } x (8 - length $reverse_exitcode{$level}) . "$reverse_exitcode{$level} | $msg\n";
4494 $nagios_alert_count{$reverse_exitcode{$level}}++;
4499 my $c = 0; # counter to determine linebreaks
4501 # Run through each message, sorted by severity level
4503 foreach (sort {$a->[1] < $b->[1]} (@report_storage, @report_chassis, @report_other)) {
4504 my ($msg, $level, $nexus) = @{ $_ };
4505 next ALERT if $level == $E_OK;
4507 if (defined $opt{only}) {
4508 # If user wants only critical alerts
4509 next ALERT if ($opt{only} eq 'critical' and $level == $E_WARNING);
4511 # If user wants only warning alerts
4512 next ALERT if ($opt{only} eq 'warning' and $level == $E_CRITICAL);
4515 # Prefix with service tag if specified with option '-i|--info'
4517 if (defined $opt{htmlinfo}) {
4518 $msg = '[<a href="' . warranty_url($sysinfo{serial})
4519 . "\">$sysinfo{serial}</a>] " . $msg;
4522 $msg = "[$sysinfo{serial}] " . $msg;
4526 # Prefix with nagios level if specified with option '--state'
4527 $msg = $reverse_exitcode{$level} . ": $msg" if $opt{state};
4529 # Prefix with one-letter nagios level if specified with option '--short-state'
4530 $msg = (substr $reverse_exitcode{$level}, 0, 1) . ": $msg" if $opt{shortstate};
4532 ($c++ == 0) ? print $msg : print $linebreak, $msg;
4534 $nagios_alert_count{$reverse_exitcode{$level}}++;
4538 # Determine our exit code
4540 $exit_code = $E_UNKNOWN if $nagios_alert_count{'UNKNOWN'} > 0;
4541 $exit_code = $E_WARNING if $nagios_alert_count{'WARNING'} > 0;
4542 $exit_code = $E_CRITICAL if $nagios_alert_count{'CRITICAL'} > 0;
4544 # Global status via SNMP.. extra safety check
4545 if ($globalstatus != $E_OK && $exit_code == $E_OK && !defined $opt{only}) {
4546 print "OOPS! Something is wrong with this server, but I don't know what. ";
4547 print "The global system health status is $reverse_exitcode{$globalstatus}, ";
4548 print "but every component check is OK. This may be a bug in the Nagios plugin, ";
4549 print "please file a bug report.\n";
4554 if ($exit_code == $E_OK && defined $opt{only} && $opt{only} !~ m{\A critical|warning|chassis \z}xms && !$opt{debug}) {
4556 = ( 'storage' => "STORAGE OK - $count{pdisk} physical drives, $count{vdisk} logical drives",
4557 'fans' => $count{fan} == 0 && $blade ? 'OK - blade system with no fan probes' : "FANS OK - $count{fan} fan probes checked",
4558 'temp' => "TEMPERATURES OK - $count{temp} temperature probes checked",
4559 'memory' => "MEMORY OK - $count{dimm} memory modules, $count{mem} MB total memory",
4560 'power' => $count{power} == 0 ? 'OK - no instrumented power supplies found' : "POWER OK - $count{power} power supplies checked",
4561 'cpu' => "PROCESSORS OK - $count{cpu} processors checked",
4562 'voltage' => "VOLTAGE OK - $count{volt} voltage probes checked",
4563 'batteries' => $count{bat} == 0 ? 'OK - no batteries found' : "BATTERIES OK - $count{bat} batteries checked",
4564 'amperage' => $count{amp} == 0 ? 'OK - no power monitoring probes found' : "AMPERAGE OK - $count{amp} amperage (power monitoring) probes checked",
4565 'intrusion' => $count{intr} == 0 ? 'OK - no intrusion detection probes found' : "INTRUSION OK - $count{intr} intrusion detection probes checked",
4566 'alertlog' => $snmp ? 'OK - not supported via snmp' : "OK - Alert Log content: $count{alert}{Ok} ok, $count{alert}{'Non-Critical'} warning and $count{alert}{Critical} critical",
4567 'esmlog' => "OK - ESM Log content: $count{esm}{Ok} ok, $count{esm}{'Non-Critical'} warning and $count{esm}{Critical} critical",
4568 'esmhealth' => "ESM LOG OK - less than 80% used",
4571 print $okmsg{$opt{only}};
4573 elsif ($exit_code == $E_OK && !$opt{debug}) {
4574 if (defined $opt{htmlinfo}) {
4575 printf q{OK - System: '<a href="%s">%s%s</a>', SN: '<a href="%s">%s</a>'},
4576 documentation_url($sysinfo{model}), $sysinfo{model}, $sysinfo{rev},
4577 warranty_url($sysinfo{serial}), $sysinfo{serial};
4580 printf q{OK - System: '%s%s', SN: '%s'},
4581 $sysinfo{model}, $sysinfo{rev}, $sysinfo{serial};
4584 if ($check{memory}) {
4586 if ($count{mem} >= 1024) {
4587 $count{mem} /= 1024;
4590 printf ', %d %s ram (%d dimms)', $count{mem}, $unit, $count{dimm};
4593 print ', not checking memory';
4596 if ($check{storage}) {
4597 printf ', %d logical drives, %d physical drives',
4598 $count{vdisk}, $count{pdisk};
4601 print ', not checking storage';
4604 if ($opt{okinfo} >= 1) {
4606 printf q{----- BIOS='%s %s'}, $sysinfo{bios}, $sysinfo{biosdate};
4608 if ($sysinfo{rac}) {
4609 printf q{, %s='%s'}, $sysinfo{rac_name}, $sysinfo{rac_fw};
4611 if ($sysinfo{bmc}) {
4612 printf q{, BMC='%s'}, $sysinfo{bmc_fw};
4616 if ($opt{okinfo} >= 2) {
4617 if ($check{storage}) {
4618 my @storageprint = ();
4619 foreach my $id (sort keys %{ $sysinfo{controller} }) {
4620 chomp $sysinfo{controller}{$id}{driver};
4621 my $msg = sprintf q{----- Ctrl %s [%s]: Fw='%s', Dr='%s'},
4622 $sysinfo{controller}{$id}{id}, $sysinfo{controller}{$id}{name},
4623 $sysinfo{controller}{$id}{firmware}, $sysinfo{controller}{$id}{driver};
4624 if (defined $sysinfo{controller}{$id}{storport}) {
4625 $msg .= sprintf q{, Storport: '%s'}, $sysinfo{controller}{$id}{storport};
4627 push @storageprint, $msg;
4629 foreach my $id (sort keys %{ $sysinfo{enclosure} }) {
4630 push @storageprint, sprintf q{----- Encl %s [%s]: Fw='%s'},
4631 $sysinfo{enclosure}{$id}->{id}, $sysinfo{enclosure}{$id}->{name},
4632 $sysinfo{enclosure}{$id}->{firmware};
4636 foreach my $line (@storageprint) {
4637 print $linebreak, $line;
4642 if ($opt{okinfo} >= 3) {
4643 print "$linebreak----- OpenManage Server Administrator (OMSA) version: '$sysinfo{om}'";
4648 if ($opt{extinfo}) {
4650 if (defined $opt{htmlinfo}) {
4651 printf '------ SYSTEM: <a href="%s">%s%s</a>, SN: <a href="%s">%s</a>',
4652 documentation_url($sysinfo{model}), $sysinfo{model}, $sysinfo{rev},
4653 warranty_url($sysinfo{serial}), $sysinfo{serial};
4656 printf '------ SYSTEM: %s%s, SN: %s',
4657 $sysinfo{model}, $sysinfo{rev}, $sysinfo{serial};
4660 if (defined $opt{postmsg}) {
4662 if (-f $opt{postmsg}) {
4663 open my $POST, '<', $opt{postmsg}
4664 or ( print $linebreak
4665 and print "ERROR: Couldn't open post message file $opt{postmsg}: $!\n"
4666 and exit $E_UNKNOWN );
4672 $post = $opt{postmsg};
4674 if (defined $post) {
4676 $post =~ s{[%]s}{$sysinfo{serial}}gxms;
4677 $post =~ s{[%]m}{$sysinfo{model}$sysinfo{rev}}gxms;
4678 $post =~ s{[%]b}{$sysinfo{bios}}gxms;
4679 $post =~ s{[%]d}{$sysinfo{biosdate}}gxms;
4680 $post =~ s{[%]o}{$sysinfo{osname}}gxms;
4681 $post =~ s{[%]r}{$sysinfo{osver}}gxms;
4682 $post =~ s{[%]p}{$count{pdisk}}gxms;
4683 $post =~ s{[%]l}{$count{vdisk}}gxms;
4684 $post =~ s{[%]n}{$linebreak}gxms;
4685 $post =~ s{[%]{2}}{%}gxms;
4691 # Reset the WARN signal
4692 $SIG{__WARN__} = 'DEFAULT';
4694 # Print any perl warnings that have occured
4695 if (@perl_warnings) {
4696 foreach (@perl_warnings) {
4698 print "${linebreak}INTERNAL ERROR: @$_";
4700 $exit_code = $E_UNKNOWN;
4703 # Print performance data
4704 if (defined $opt{perfdata} && !$opt{debug} && @perfdata) {
4705 my $lb = $opt{perfdata} eq 'multiline' ? "\n" : q{ }; # line break for perfdata
4708 # Sort routine for performance data
4710 my %order = ( fan => 0, pwr => 1, tem => 2, enc => 3, );
4711 return ($order{(substr $a->{label}, 0, 3)} cmp $order{(substr $b->{label}, 0, 3)}) ||
4712 $a->{label} cmp $b->{label};
4715 # Print performance data sorted
4716 my $type = $opt{perfdata} eq 'minimal' ? 'mini' : 'label';
4717 print join $lb, map { "$_->{$type}=$_->{value};$_->{warn};$_->{crit}" } sort perfsort @perfdata;
4720 # Print a linebreak at the end
4721 print "\n" if !$opt{debug};
4723 # Exit with proper exit code