]> git.uio.no Git - check_openmanage.git/blame - check_openmanage
* version 3.5.0-beta17
[check_openmanage.git] / check_openmanage
CommitLineData
669797e1 1#!/usr/bin/perl
2#
3# Nagios plugin
4#
5# Monitor Dell server hardware status using Dell OpenManage Server
6# Administrator, either locally via NRPE, or remotely via SNMP.
7#
8# $Id$
9#
10# Copyright (C) 2009 Trond H. Amundsen
11#
12# This program is free software: you can redistribute it and/or modify
13# it under the terms of the GNU General Public License as published by
14# the Free Software Foundation, either version 3 of the License, or
15# (at your option) any later version.
16#
17# This program is distributed in the hope that it will be useful, but
18# WITHOUT ANY WARRANTY; without even the implied warranty of
19# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20# General Public License for more details.
21#
22# You should have received a copy of the GNU General Public License
23# along with this program. If not, see <http://www.gnu.org/licenses/>.
24#
25
26require 5.006; # Perl v5.6.0 or newer is required
27use strict;
28use warnings;
29use POSIX qw(isatty ceil);
30use Getopt::Long qw(:config no_ignore_case);
31
32# Global (package) variables used throughout the code
33use vars qw( $NAME $VERSION $AUTHOR $CONTACT $E_OK $E_WARNING $E_CRITICAL
34 $E_UNKNOWN $FW_LOCK $USAGE $HELP $LICENSE
35 $snmp_session $snmp_error $omreport $globalstatus $global
36 $linebreak $omopt_chassis $omopt_system $blade
37 $exit_code $snmp
38 %check %opt %perfdata %reverse_exitcode %status2nagios
39 %snmp_status %snmp_probestatus %probestatus2nagios %sysinfo
40 %blacklist %nagios_alert_count %count
41 @controllers @enclosures
42 @report_storage @report_chassis @report_other
43 );
44
45#---------------------------------------------------------------------
46# Initialization and global variables
47#---------------------------------------------------------------------
48
49# If we don't have a TTY, the plugin is probably run by Nagios. In
50# that case, redirect all output to STDERR to STDOUT. Nagios ignores
51# output to STDERR.
8fc0c318 52if (! isatty(*STDOUT)) {
d866dafd 53 open STDERR, '>&', 'STDOUT'
54 or do { print "ERROR: Couldn't redirect STDERR to STDOUT\n"; exit 2; }
669797e1 55}
56
57# Version and similar info
58$NAME = 'check_openmanage';
477749ff 59$VERSION = '3.5.0-beta17';
669797e1 60$AUTHOR = 'Trond H. Amundsen';
61$CONTACT = 't.h.amundsen@usit.uio.no';
62
63# Exit codes
64$E_OK = 0;
65$E_WARNING = 1;
66$E_CRITICAL = 2;
67$E_UNKNOWN = 3;
68
69# Firmware update lock file [FIXME: location on Windows?]
70$FW_LOCK = '/var/lock/.spsetup'; # default on Linux
71
72# Usage text
73$USAGE = <<"END_USAGE";
74Usage: $NAME [OPTION]...
75END_USAGE
76
77# Help text
78$HELP = <<'END_HELP';
79
80GENERAL OPTIONS:
81
82 -p, --perfdata Output performance data
83 -t, --timeout Plugin timeout in seconds
84 -c, --critical Customise temperature critical limits
85 -w, --warning Customise temperature warning limits
86 -d, --debug Debug output, reports everything
87 -h, --help Display this help text
88 -V, --version Display version info
89
90SNMP OPTIONS:
91
92 -H, --hostname Hostname or IP of the server (needed for SNMP)
93 -C, --community SNMP community string
94 -P, --protocol SNMP protocol version
95 --port SNMP port number
96
97OUTPUT OPTIONS:
98
99 -i, --info Prefix any alerts with the service tag
100 -e, --extinfo Append system info to alerts
101 -s, --state Prefix alerts with alert state
102 --short-state Prefix alerts with alert state (abbreviated)
103 -o, --okinfo Verbosity when check result is OK
104 --htmlinfo HTML output with clickable links
105
106CHECK CONTROL AND BLACKLISTING:
107
108 -a, --all Check everything, even log content
109 -b, --blacklist Blacklist missing and/or failed components
110 --only Only check a certain component or alert type
111 --check Fine-tune which components are checked
112
113For more information and advanced options, see the manual page or URL:
114 http://folk.uio.no/trondham/software/check_openmanage.html
115END_HELP
116
117# Version and license text
118$LICENSE = <<"END_LICENSE";
119$NAME $VERSION
120Copyright (C) 2009 $AUTHOR
121License GPLv3+: GNU GPL version 3 or later <http://gnu.org/licenses/gpl.html>
122This is free software: you are free to change and redistribute it.
123There is NO WARRANTY, to the extent permitted by law.
124
125Written by $AUTHOR <$CONTACT>
126END_LICENSE
127
128# Options with default values
129%opt = ( 'blacklist' => [],
130 'check' => [],
131 'critical' => [],
132 'warning' => [],
133 'timeout' => 30, # default timeout is 30 seconds
134 'debug' => 0,
135 'help' => 0,
136 'perfdata' => undef,
137 'info' => 0,
138 'extinfo' => 0,
139 'htmlinfo' => undef,
140 'postmsg' => undef,
141 'state' => 0,
142 'short-state' => 0,
143 'okinfo' => 0, # default "ok" output level
144 'linebreak' => undef,
145 'version' => 0,
146 'all' => 0,
147 'only' => undef,
9ed0700c 148 'omreport' => undef,
669797e1 149 'port' => 161, # default SNMP port
150 'hostname' => undef,
151 'community' => 'public', # SMNP v1 or v2c
152 'protocol' => 2,
153 'username' => undef, # SMNP v3
154 'authpassword' => undef, # SMNP v3
155 'authkey' => undef, # SMNP v3
156 'authprotocol' => undef, # SMNP v3
157 'privpassword' => undef, # SMNP v3
158 'privkey' => undef, # SMNP v3
159 'privprotocol' => undef, # SMNP v3
160 );
161
162# Get options
163GetOptions('b|blacklist=s' => \@{ $opt{blacklist} },
164 'check=s' => \@{ $opt{check} },
165 'c|critical=s' => \@{ $opt{critical} },
166 'w|warning=s' => \@{ $opt{warning} },
167 't|timeout=i' => \$opt{timeout},
168 'd|debug' => \$opt{debug},
169 'h|help' => \$opt{help},
170 'V|version' => \$opt{version},
171 'p|perfdata:s' => \$opt{perfdata},
172 'i|info' => \$opt{info},
173 'e|extinfo' => \$opt{extinfo},
174 'htmlinfo:s' => \$opt{htmlinfo},
175 'postmsg=s' => \$opt{postmsg},
176 's|state' => \$opt{state},
177 'short-state' => \$opt{shortstate},
178 'o|ok-info=i' => \$opt{okinfo},
179 'l|linebreak=s' => \$opt{linebreak},
180 'a|all' => \$opt{all},
181 'only=s' => \$opt{only},
9ed0700c 182 'omreport=s' => \$opt{omreport},
669797e1 183 'port=i' => \$opt{port},
184 'H|hostname=s' => \$opt{hostname},
185 'C|community=s' => \$opt{community},
186 'P|protocol=i' => \$opt{protocol},
187 'U|username=s' => \$opt{username},
188 'authpassword=s' => \$opt{authpassword},
189 'authkey=s' => \$opt{authkey},
190 'authprotocol=s' => \$opt{authprotocol},
191 'privpassword=s' => \$opt{privpassword},
192 'privkey=s' => \$opt{privkey},
193 'privprotocol=s' => \$opt{privprotocol},
194 ) or do { print $USAGE; exit $E_UNKNOWN };
195
196# If user requested help
197if ($opt{help}) {
198 print $USAGE, $HELP;
199 exit $E_OK;
200}
201
202# If user requested version info
203if ($opt{version}) {
204 print $LICENSE;
205 exit $E_OK;
206}
207
208# Setting timeout
209$SIG{ALRM} = sub {
210 print "PLUGIN TIMEOUT: $NAME timed out after $opt{timeout} seconds\n";
211 exit $E_UNKNOWN;
212};
213alarm $opt{timeout};
214
215# If we're using SNMP
216$snmp = defined $opt{hostname} ? 1 : 0;
217
218# SNMP session variables
219$snmp_session = undef;
220$snmp_error = undef;
221
222# The omreport command
223$omreport = undef;
224
225# Check flags, override available with the --check option
226%check = ( 'storage' => 1, # check storage subsystem
227 'memory' => 1, # check memory (dimms)
228 'fans' => 1, # check fan status
229 'power' => 1, # check power supplies
230 'temp' => 1, # check temperature
231 'cpu' => 1, # check processors
232 'voltage' => 1, # check voltage
233 'batteries' => 1, # check battery probes
234 'amperage' => 1, # check power consumption
235 'intrusion' => 1, # check intrusion detection
236 'alertlog' => 0, # check the alert log
237 'esmlog' => 0, # check the ESM log (hardware log)
238 'esmhealth' => 1, # check the ESM log overall health
239 );
240
241# Default line break
51e99613 242$linebreak = isatty(*STDOUT) ? "\n" : '<br/>';
669797e1 243
244# Line break from option
245if (defined $opt{linebreak}) {
246 if ($opt{linebreak} eq 'REG') {
247 $linebreak = "\n";
248 }
249 elsif ($opt{linebreak} eq 'HTML') {
250 $linebreak = '<br/>';
251 }
252 else {
253 $linebreak = $opt{linebreak};
254 }
255}
256
257# Exit with status=UNKNOWN if there is firmware upgrade in progress
258if (!$snmp && -f $FW_LOCK) {
259 print "MONITORING DISABLED - Firmware update in progress ($FW_LOCK exists)\n";
260 exit $E_UNKNOWN;
261}
262
263# List of controllers and enclosures
264@controllers = (); # controllers
265@enclosures = (); # enclosures
266
267# Messages
268@report_storage = (); # messages with associated nagios level (storage)
269@report_chassis = (); # messages with associated nagios level (chassis)
270@report_other = (); # messages with associated nagios level (other)
271
272# Counters for everything
273%count
274 = (
275 'pdisk' => 0, # number of physical disks
276 'vdisk' => 0, # number of logical drives (virtual disks)
277 'temp' => 0, # number of temperature probes
278 'volt' => 0, # number of voltage probes
279 'amp' => 0, # number of amperage probes
280 'intr' => 0, # number of intrusion probes
281 'dimm' => 0, # number of memory modules
282 'fan' => 0, # number of fan probes
283 'cpu' => 0, # number of CPUs
284 'bat' => 0, # number of batteries
285 'power' => 0, # number of power supplies
286 'esm' => {
287 'Critical' => 0, # critical entries in ESM log
288 'Non-Critical' => 0, # warning entries in ESM log
289 'Ok' => 0, # ok entries in ESM log
290 },
291 'alert' => {
292 'Critical' => 0, # critical entries in alert log
293 'Non-Critical' => 0, # warning entries in alert log
294 'Ok' => 0, # ok entries in alert log
295 },
296 );
297
298# Performance data
299%perfdata = ();
300
301# Global health status
302$global = 1; # default is to check global status
303$globalstatus = $E_OK; # default global health status is "OK"
304
305# Nagios error levels reversed
306%reverse_exitcode
307 = (
308 $E_OK => 'OK',
309 $E_WARNING => 'WARNING',
310 $E_CRITICAL => 'CRITICAL',
311 $E_UNKNOWN => 'UNKNOWN',
312 );
313
314# OpenManage (omreport) and SNMP error levels
315%status2nagios
316 = (
317 'Unknown' => $E_CRITICAL,
318 'Critical' => $E_CRITICAL,
319 'Non-Critical' => $E_WARNING,
320 'Ok' => $E_OK,
321 'Non-Recoverable' => $E_CRITICAL,
322 'Other' => $E_CRITICAL,
323 );
324
325# Status via SNMP
326%snmp_status
327 = (
328 1 => 'Other',
329 2 => 'Unknown',
330 3 => 'Ok',
331 4 => 'Non-Critical',
332 5 => 'Critical',
333 6 => 'Non-Recoverable',
334 );
335
336# Probe Status via SNMP
337%snmp_probestatus
338 = (
339 1 => 'Other', # probe status is not one of the following:
340 2 => 'Unknown', # probe status is unknown (not known or monitored)
341 3 => 'Ok', # probe is reporting a value within the thresholds
342 4 => 'nonCriticalUpper', # probe has crossed upper noncritical threshold
343 5 => 'criticalUpper', # probe has crossed upper critical threshold
344 6 => 'nonRecoverableUpper', # probe has crossed upper non-recoverable threshold
345 7 => 'nonCriticalLower', # probe has crossed lower noncritical threshold
346 8 => 'criticalLower', # probe has crossed lower critical threshold
347 9 => 'nonRecoverableLower', # probe has crossed lower non-recoverable threshold
348 10 => 'failed', # probe is not functional
349 );
350
351# Probe status translated to Nagios alarm levels
352%probestatus2nagios
353 = (
354 'Other' => $E_CRITICAL,
355 'Unknown' => $E_CRITICAL,
356 'Ok' => $E_OK,
357 'nonCriticalUpper' => $E_WARNING,
358 'criticalUpper' => $E_CRITICAL,
359 'nonRecoverableUpper' => $E_CRITICAL,
360 'nonCriticalLower' => $E_WARNING,
361 'criticalLower' => $E_CRITICAL,
362 'nonRecoverableLower' => $E_CRITICAL,
363 'failed' => $E_CRITICAL,
364 );
365
366# System information gathered
367%sysinfo
368 = (
369 'bios' => 'N/A', # BIOS version
370 'biosdate' => 'N/A', # BIOS release date
371 'serial' => 'N/A', # serial number (service tag)
372 'model' => 'N/A', # system model
373 'osname' => 'N/A', # OS name
374 'osver' => 'N/A', # OS version
375 'om' => 'N/A', # OMSA version
376 'bmc' => 0, # HAS baseboard management controller (BMC)
377 'rac' => 0, # HAS remote access controller (RAC)
378 'rac_name' => 'N/A', # remote access controller (RAC)
379 'bmc_fw' => 'N/A', # BMC firmware
380 'rac_fw' => 'N/A', # RAC firmware
381 );
382
383# Adjust which checks to perform
384adjust_checks() if defined $opt{check};
385
386# Blacklisted components
387%blacklist = defined $opt{blacklist} ? %{ get_blacklist() } : ();
388
389# If blacklisting is in effect, don't check global health status
390if (scalar keys %blacklist > 0) {
391 $global = 0;
392}
393
394# Take into account new hardware and blades
395$omopt_chassis = 'chassis'; # default "chassis" option to omreport
396$omopt_system = 'system'; # default "system" option to omreport
397$blade = 0; # if this is a blade system
398
399# Some initializations and checking before we begin
400if ($snmp) {
401 snmp_initialize(); # initialize SNMP
402 snmp_check(); # check that SNMP works
403 snmp_detect_blade(); # detect blade via SNMP
404}
405else {
406 # Find the omreport binary
407 find_omreport();
408 # Check help output from omreport, see which options are available.
409 # Also detecting blade via omreport.
410 check_omreport_options();
411}
412
413
414#---------------------------------------------------------------------
415# Helper functions
416#---------------------------------------------------------------------
417
418#
419# Store a message in one of the message arrays
420#
421sub report {
422 my ($type, $msg, $exval, $id) = @_;
423 defined $id or $id = q{};
424
425 my %type2array
426 = (
427 'storage' => \@report_storage,
428 'chassis' => \@report_chassis,
429 'other' => \@report_other,
430 );
431
432 return push @{ $type2array{$type} }, [ $msg, $exval, $id ];
433}
434
435
436#
437# Run command, put resulting output lines in an array and return a
438# pointer to that array
439#
440sub run_command {
441 my $command = shift;
442
443 open my $CMD, '-|', $command
444 or do { report('other', "Couldn't run command '$command': $!", $E_UNKNOWN)
445 and return [] };
446 my @lines = <$CMD>;
447 close $CMD
448 or do { report('other', "Couldn't close filehandle for command '$command': $!", $E_UNKNOWN)
449 and return \@lines };
450 return \@lines;
451}
452
453#
454# Run command, put resulting output in a string variable and return it
455#
456sub slurp_command {
457 my $command = shift;
458
459 open my $CMD, '-|', $command
460 or do { report('other', "Couldn't run command '$command': $!", $E_UNKNOWN) and return };
461 my $rawtext = do { local $/ = undef; <$CMD> }; # slurping
462 close $CMD;
463
464 # NOTE: We don't check the return value of close() since omreport
465 # does something weird sometimes.
466
467 return $rawtext;
468}
469
470#
471# Initialize SNMP
472#
473sub snmp_initialize {
474 # Legal SNMP v3 protocols
475 my $snmp_v3_privprotocol = qr{\A des|aes|aes128|3des|3desde \z}xms;
476 my $snmp_v3_authprotocol = qr{\A md5|sha \z}xms;
477
478 # Parameters to Net::SNMP->session()
479 my %param
480 = (
481 '-port' => $opt{port},
482 '-hostname' => $opt{hostname},
483 '-version' => $opt{protocol},
484 );
485
486 # Parameters for SNMP v3
487 if ($opt{protocol} == 3) {
488
489 # Username is mandatory
490 if (defined $opt{username}) {
491 $param{'-username'} = $opt{username};
492 }
493 else {
494 print "SNMP ERROR: With SNMPv3 the username must be specified\n";
495 exit $E_UNKNOWN;
496 }
497
498 # Authpassword is optional
499 if (defined $opt{authpassword}) {
500 $param{'-authpassword'} = $opt{authpassword};
501 }
502
503 # Authkey is optional
504 if (defined $opt{authkey}) {
505 $param{'-authkey'} = $opt{authkey};
506 }
507
508 # Privpassword is optional
509 if (defined $opt{privpassword}) {
510 $param{'-privpassword'} = $opt{privpassword};
511 }
512
513 # Privkey is optional
514 if (defined $opt{privkey}) {
515 $param{'-privkey'} = $opt{privkey};
516 }
517
518 # Privprotocol is optional
519 if (defined $opt{privprotocol}) {
520 if ($opt{privprotocol} =~ m/$snmp_v3_privprotocol/xms) {
521 $param{'-privprotocol'} = $opt{privprotocol};
522 }
523 else {
524 print "SNMP ERROR: Unknown privprotocol '$opt{privprotocol}', "
525 . "must be one of [des|aes|aes128|3des|3desde]\n";
526 exit $E_UNKNOWN;
527 }
528 }
529
530 # Authprotocol is optional
531 if (defined $opt{authprotocol}) {
532 if ($opt{authprotocol} =~ m/$snmp_v3_authprotocol/xms) {
533 $param{'-authprotocol'} = $opt{authprotocol};
534 }
535 else {
536 print "SNMP ERROR: Unknown authprotocol '$opt{authprotocol}', "
537 . "must be one of [md5|sha]\n";
538 exit $E_UNKNOWN;
539 }
540 }
541 }
542 # Parameters for SNMP v2c or v1
543 elsif ($opt{protocol} == 2 or $opt{protocol} == 1) {
544 $param{'-community'} = $opt{community};
545 }
546 else {
547 print "SNMP ERROR: Unknown SNMP version '$opt{protocol}'\n";
548 exit $E_UNKNOWN;
549 }
550
551 # Try to initialize the SNMP session
552 if ( eval { require Net::SNMP; 1 } ) {
553 ($snmp_session, $snmp_error) = Net::SNMP->session( %param );
554 if (!defined $snmp_session) {
555 printf "SNMP: %s\n", $snmp_error;
556 exit $E_UNKNOWN;
557 }
558 }
559 else {
560 print "You need perl module Net::SNMP to run $NAME in SNMP mode\n";
561 exit $E_UNKNOWN;
562 }
563 return;
564}
565
566#
567# Checking if SNMP works by probing for "chassisModelName", which all
568# servers should have
569#
570sub snmp_check {
571 my $chassisModelName = '1.3.6.1.4.1.674.10892.1.300.10.1.9.1';
572 my $result = $snmp_session->get_request(-varbindlist => [$chassisModelName]);
573
574 # Typically if remote host isn't responding
575 if (!defined $result) {
576 printf "SNMP CRITICAL: %s\n", $snmp_session->error;
577 exit $E_CRITICAL;
578 }
579
580 # If OpenManage isn't installed or is not working
581 if ($result->{$chassisModelName} =~ m{\A noSuch (Instance|Object) \z}xms) {
582 print "ERROR: (SNMP) OpenManage is not installed or is not working correctly\n";
583 exit $E_UNKNOWN;
584 }
585 return;
586}
587
588#
589# Detecting blade via SNMP
590#
591sub snmp_detect_blade {
592 my $DellBaseBoardType = '1.3.6.1.4.1.674.10892.1.300.80.1.7.1.1';
593 my $result = $snmp_session->get_request(-varbindlist => [$DellBaseBoardType]);
594
595 # Identify blade. Older models (4th and 5th gen models) and/or old
596 # OMSA (4.x) don't have this OID. If we get "noSuchInstance" or
597 # similar, we assume that this isn't a blade
598 if ($result->{$DellBaseBoardType} eq '3') {
599 $blade = 1;
600 }
601 return;
602}
603
604#
605# Locate the omreport binary
606#
607sub find_omreport {
ac760e0d 608 # If user has specified path to omreport
609 if (defined $opt{omreport} and -x $opt{omreport}) {
610 $omreport = $opt{omreport};
611 return;
612 }
613
669797e1 614 # Possible full paths for omreport
615 my @omreport_paths
616 = (
617 '/usr/bin/omreport', # default on Linux
618 '/opt/dell/srvadmin/oma/bin/omreport.sh', # alternate on Linux
619 '/opt/dell/srvadmin/oma/bin/omreport', # alternate on Linux
9025e83f 620 'C:\Program Files (x86)\Dell\SysMgt\oma\bin\omreport.exe', # default on Windows x64
621 'C:\Program Files\Dell\SysMgt\oma\bin\omreport.exe', # default on Windows x32
421b6c77 622 'c:\progra~1\dell\sysmgt\oma\bin\omreport.exe', # 8bit legacy default on Windows x32
623 'c:\progra~2\dell\sysmgt\oma\bin\omreport.exe', # 8bit legacy default on Windows x64
669797e1 624 );
625
626 # Find the one to use
627 OMREPORT_PATH:
628 foreach my $bin (@omreport_paths) {
629 if (-x $bin) {
630 $omreport = $bin;
631 last OMREPORT_PATH;
632 }
633 }
634
635 # Exit with status=UNKNOWN if OM is not installed, or we don't
636 # have permission to execute the binary
637 if (!defined $omreport) {
638 print "ERROR: Dell OpenManage Server Administrator (OMSA) is not installed\n";
639 exit $E_UNKNOWN;
640 }
641 return;
642}
643
644#
645# Checks output from 'omreport -?' and searches for arguments to
646# omreport, to accommodate deprecated options "chassis" and "system"
647# (on newer hardware), as well as blade servers.
648#
649sub check_omreport_options {
650 foreach (@{ run_command("$omreport -? 2>&1") }) {
651 if (m/\A servermodule /xms) {
652 # If "servermodule" argument to omreport exists, use it
653 # instead of argument "system"
654 $omopt_system = 'servermodule';
655 }
656 elsif (m/\A mainsystem /xms) {
657 # If "mainsystem" argument to omreport exists, use it
658 # instead of argument "chassis"
659 $omopt_chassis = 'mainsystem';
660 }
661 elsif (m/\A modularenclosure /xms) {
662 # If "modularenclusure" argument to omreport exists, assume
663 # that this is a blade
664 $blade = 1;
665 }
666 }
667 return;
668}
669
670#
671# Read the blacklist option and return a hash containing the
672# blacklisted components
673#
674sub get_blacklist {
675 my @bl = ();
676 my %blacklist = ();
677
678 if (scalar @{ $opt{blacklist} } >= 0) {
679 foreach my $black (@{ $opt{blacklist} }) {
680 my $tmp = q{};
681 if (-f $black) {
682 open my $BL, '<', $black
683 or do { report('other', "Couldn't open blacklist file $black: $!", $E_UNKNOWN)
684 and return {} };
685 $tmp = <$BL>;
686 close $BL;
687 chomp $tmp;
688 }
689 else {
690 $tmp = $black;
691 }
692 push @bl, $tmp;
693 }
694 }
695
696 return {} if $#bl < 0;
697
698 # Parse blacklist string, put in hash
699 foreach my $black (@bl) {
700 my @comps = split m{/}xms, $black;
701 foreach my $c (@comps) {
702 next if $c !~ m/=/xms;
703 my ($key, $val) = split /=/xms, $c;
704 my @vals = split /,/xms, $val;
705 $blacklist{$key} = \@vals;
706 }
707 }
708
709 return \%blacklist;
710}
711
712#
713# Read the check option and adjust the hash %check, which is a rough
714# list of components to be checked
715#
716sub adjust_checks {
717 my @cl = ();
718
719 # Adjust checking based on the '--all' option
720 if ($opt{all}) {
721 # Check option usage
722 if (defined $opt{only} and $opt{only} !~ m{\A critical|warning \z}xms) {
723 print qq{ERROR: Wrong simultaneous usage of the "--all" and "--only" options\n};
724 exit $E_UNKNOWN;
725 }
726 if (scalar @{ $opt{check} } > 0) {
727 print qq{ERROR: Wrong simultaneous usage of the "--all" and "--check" options\n};
728 exit $E_UNKNOWN;
729 }
730
731 # set the check hash to check everything
732 map { $_ = 1 } values %check;
733
734 return;
735 }
736
737 # Adjust checking based on the '--only' option
738 if (defined $opt{only} and $opt{only} !~ m{\A critical|warning \z}xms) {
739 # Check option usage
740 if (scalar @{ $opt{check} } > 0) {
741 print qq{ERROR: Wrong simultaneous usage of the "--only" and "--check" options\n};
742 exit $E_UNKNOWN;
743 }
744 if (! exists $check{$opt{only}} and $opt{only} ne 'chassis') {
745 print qq{ERROR: "$opt{only}" is not a known keyword for the "--only" option\n};
746 exit $E_UNKNOWN;
747 }
748
749 # reset the check hash
750 map { $_ = 0 } values %check;
751
752 # adjust the check hash
753 if ($opt{only} eq 'chassis') {
754 map { $check{$_} = 1 } qw(memory fans power temp cpu voltage
755 batteries amperage intrusion esmhealth);
756 }
757 else {
758 $check{$opt{only}} = 1;
759 }
760
761 return;
762 }
763
764 # Adjust checking based on the '--check' option
765 if (scalar @{ $opt{check} } >= 0) {
766 foreach my $check (@{ $opt{check} }) {
767 my $tmp = q{};
768 if (-f $check) {
769 open my $CL, '<', $check
770 or do { report('other', "Couldn't open check file $check: $!", $E_UNKNOWN) and return };
771 $tmp = <$CL>;
772 close $CL;
773 }
774 else {
775 $tmp = $check;
776 }
777 push @cl, $tmp;
778 }
779 }
780
781 return if $#cl < 0;
782
783 # Parse checklist string, put in hash
784 foreach my $check (@cl) {
785 my @checks = split /,/xms, $check;
786 foreach my $c (@checks) {
787 next if $c !~ m/=/xms;
788 my ($key, $val) = split /=/xms, $c;
789 $check{$key} = $val;
790 }
791 }
792
793 # Check if we should check global health status
794 CHECK_KEY:
795 foreach (keys %check) {
796 next CHECK_KEY if $_ eq 'esmlog'; # not part of global status
797 next CHECK_KEY if $_ eq 'alertlog'; # not part of global status
798
799 if ($check{$_} == 0) { # found something with checking turned off
800 $global = 0;
801 last CHECK_KEY;
802 }
803 }
804
805 return;
806}
807
808#
809# Runs omreport and returns an array of anonymous hashes containing
810# the output.
811# Takes one argument: string containing parameters to omreport
812#
813sub run_omreport {
814 my $command = shift;
815 my @output = ();
816 my @keys = ();
817
818 # Errors that are OK. Some low-end poweredge (and blades) models
819 # don't have RAID controllers, intrusion detection sensor, or
820 # redundant/instrumented power supplies etc.
821 my $ok_errors
822 = qr{
823 Intrusion\sinformation\sis\snot\sfound\sfor\sthis\ssystem # No intrusion probe
824 | No\sinstrumented\spower\ssupplies\sfound\son\sthis\ssystem # No instrumented PS (blades/low-end)
825 | No\scontrollers\sfound # No RAID controller
826 | No\sbattery\sprobes\sfound\son\sthis\ssystem # No battery probes
827 | Invalid\scommand:\spwrmonitoring # Older OMSAs lack this command(?)
828 }xms;
829
830 # Errors that are OK on blade servers
831 my $ok_blade_errors
832 = qr{
833 No\sfan\sprobes\sfound\son\sthis\ssystem # No fan probes
834 }xms;
835
836 # Run omreport and fetch output
837 my $rawtext = slurp_command("$omreport $command -fmt ssv 2>&1");
838 return [] if !defined $rawtext;
839
840 # Workaround for Openmanage BUG introduced in OMSA 5.5.0
841 $rawtext =~ s/\n;/;/gxms if $command eq 'storage controller';
842
843 # Parse output, store in array
844 for ((split /\n/xms, $rawtext)) {
845 if (m/\A Error/xms) {
846 next if m{$ok_errors}xms;
847 next if ($blade and m{$ok_blade_errors}xms);
848 report('other', "Problem running 'omreport $command': $_", $E_UNKNOWN);
849 }
850
851 next if !m/(.*?;){2}/xms; # ignore lines with less than 3 fields
852 my @vals = split /;/xms;
853 if ($vals[0] =~ m/\A (Index|ID|Severity) \z/xms) {
854 @keys = @vals;
855 }
856 else {
857 my $i = 0;
858 push @output, { map { $_ => $vals[$i++] } @keys };
859 }
860
861 }
862
863 # Finally, return the collected information
864 return \@output;
865}
866
867
868#
869# Checks if a component is blacklisted. Returns 1 if the component is
870# blacklisted, 0 otherwise. Takes two arguments:
871# arg1: component name
872# arg2: component id or index
873#
874sub blacklisted {
875 my $name = shift; # component name
876 my $id = shift; # component id
877 my $ret = 0; # return value
878
879 if (defined $blacklist{$name}) {
880 foreach my $comp (@{ $blacklist{$name} }) {
ffd6b550 881 if (defined $id and ($comp eq $id or $comp eq 'ALL')) {
669797e1 882 $ret = 1;
883 }
884 }
885 }
886
887 return $ret;
888}
889
890# Converts the NexusID from SNMP to our version
891sub convert_nexus {
892 my $nexus = shift;
893 $nexus =~ s{\A \\}{}xms;
894 $nexus =~ s{\\}{:}gxms;
895 return $nexus;
896}
897
898# Sets custom temperature thresholds based on user supplied options
899sub custom_temperature_thresholds {
900 my $type = shift; # type of threshold, either w (warning) or c (critical)
901 my %thres = (); # will contain the thresholds
902 my @limits = (); # holds the input
903
904 my @opt = $type eq 'w' ? @{ $opt{warning} } : @{ $opt{critical} };
905
906 if (scalar @opt >= 0) {
907 foreach my $t (@opt) {
908 my $tmp = q{};
909 if (-f $t) {
910 open my $F, '<', $t
911 or do { report('other', "Couldn't open temperature threshold file $t: $!",
912 $E_UNKNOWN) and return {} };
913 $tmp = <$F>;
914 close $F;
915 }
916 else {
917 $tmp = $t;
918 }
919 push @limits, $tmp;
920 }
921 }
922
923 # Parse checklist string, put in hash
924 foreach my $th (@limits) {
925 my @tmp = split m{,}xms, $th;
926 foreach my $t (@tmp) {
927 next if $t !~ m{=}xms;
928 my ($key, $val) = split m{=}xms, $t;
929 if ($val =~ m{/}xms) {
930 my ($max, $min) = split m{/}xms, $val;
931 $thres{$key}{max} = $max;
932 $thres{$key}{min} = $min;
933 }
934 else {
935 $thres{$key}{max} = $val;
936 }
937 }
938 }
939
940 return \%thres;
941}
942
943
944# Gets the output from SNMP result according to the OIDs checked
945sub get_snmp_output {
946 my ($result,$oidref) = @_;
947 my @output = ();
948
949 foreach my $oid (keys %{ $result }) {
950 my @dummy = split /\./xms, $oid;
951 my $id = pop @dummy;
952 --$id;
953 my $foo = join q{.}, @dummy;
954 if (exists $oidref->{$foo}) {
955 $output[$id]{$oidref->{$foo}} = $result->{$oid};
956 }
957 }
958 return \@output;
959}
960
961
962# Map the controller or other item in-place
963sub map_item {
964 my ($key, $val, $list) = @_;
965
966 foreach my $lst (@{ $list }) {
967 if (!exists $lst->{$key}) {
968 $lst->{$key} = $val;
969 }
970 }
971 return;
972}
973
974# Return the URL for official Dell documentation for a specific
975# PowerEdge server
976sub documentation_url {
977 my $model = shift;
978
979 # create model short form, e.g. "r710"
980 $model =~ s{\A PowerEdge \s (.+?) \z}{lc($1)}exms;
981
982 # special case for blades (e.g. M600, M710), they have common
983 # documentation
984 $model =~ s{\A m\d+ \z}{m}xms;
985
986 return 'http://support.dell.com/support/edocs/systems/pe' . $model . '/';
987}
988
989# Return the URL for warranty information for a server with a given
990# serial number (servicetag)
991sub warranty_url {
992 my $tag = shift;
993
994 # Dell support sites for different parts of the world
995 my %supportsite
996 = (
997 'emea' => 'http://support.euro.dell.com/support/topics/topic.aspx/emea/shared/support/my_systems_info/',
998 'ap' => 'http://supportapj.dell.com/support/topics/topic.aspx/ap/shared/support/my_systems_info/en/details?',
999 'glob' => 'http://support.dell.com/support/topics/global.aspx/support/my_systems_info/details?',
1000 );
1001
1002 # warranty URLs for different country codes
1003 my %url
1004 = (
1005 # EMEA
1006 'at' => $supportsite{emea} . 'de/details?c=at&l=de&ServiceTag=', # Austria
1007 'be' => $supportsite{emea} . 'nl/details?c=be&l=nl&ServiceTag=', # Belgium
1008 'cz' => $supportsite{emea} . 'cs/details?c=cz&l=cs&ServiceTag=', # Czech Republic
1009 'de' => $supportsite{emea} . 'de/details?c=de&l=de&ServiceTag=', # Germany
1010 'dk' => $supportsite{emea} . 'da/details?c=dk&l=da&ServiceTag=', # Denmark
1011 'es' => $supportsite{emea} . 'es/details?c=es&l=es&ServiceTag=', # Spain
1012 'fi' => $supportsite{emea} . 'fi/details?c=fi&l=fi&ServiceTag=', # Finland
1013 'fr' => $supportsite{emea} . 'fr/details?c=fr&l=fr&ServiceTag=', # France
1014 'gr' => $supportsite{emea} . 'en/details?c=gr&l=el&ServiceTag=', # Greece
1015 'it' => $supportsite{emea} . 'it/details?c=it&l=it&ServiceTag=', # Italy
1016 'il' => $supportsite{emea} . 'en/details?c=il&l=en&ServiceTag=', # Israel
1017 'me' => $supportsite{emea} . 'en/details?c=me&l=en&ServiceTag=', # Middle East
1018 'no' => $supportsite{emea} . 'no/details?c=no&l=no&ServiceTag=', # Norway
1019 'nl' => $supportsite{emea} . 'nl/details?c=nl&l=nl&ServiceTag=', # The Netherlands
1020 'pl' => $supportsite{emea} . 'pl/details?c=pl&l=pl&ServiceTag=', # Poland
1021 'pt' => $supportsite{emea} . 'en/details?c=pt&l=pt&ServiceTag=', # Portugal
1022 'ru' => $supportsite{emea} . 'ru/details?c=ru&l=ru&ServiceTag=', # Russia
1023 'se' => $supportsite{emea} . 'sv/details?c=se&l=sv&ServiceTag=', # Sweden
1024 'uk' => $supportsite{emea} . 'en/details?c=uk&l=en&ServiceTag=', # United Kingdom
1025 'za' => $supportsite{emea} . 'en/details?c=za&l=en&ServiceTag=', # South Africa
1026 # America
1027 'br' => $supportsite{glob} . 'c=br&l=pt&ServiceTag=', # Brazil
1028 'ca' => $supportsite{glob} . 'c=ca&l=en&ServiceTag=', # Canada
1029 'mx' => $supportsite{glob} . 'c=mx&l=es&ServiceTag=', # Mexico
1030 'us' => $supportsite{glob} . 'c=us&l=en&ServiceTag=', # USA
1031 # Asia/Pacific
1032 'au' => $supportsite{ap} . 'c=au&l=en&ServiceTag=', # Australia
1033 'cn' => $supportsite{ap} . 'c=cn&l=zh&ServiceTag=', # China
1034 'in' => $supportsite{ap} . 'c=in&l=en&ServiceTag=', # India
1035 # default fallback
1036 'XX' => $supportsite{glob} . 'ServiceTag=', # default
1037 );
1038
1039 if (exists $url{$opt{htmlinfo}}) {
1040 return $url{$opt{htmlinfo}} . $tag;
1041 }
1042 else {
1043 return $url{XX} . $tag;
1044 }
1045}
1046
1047
1048
1049#---------------------------------------------------------------------
1050# Check functions
1051#---------------------------------------------------------------------
1052
1053#-----------------------------------------
1054# Check global health status
1055#-----------------------------------------
1056sub check_global {
1057 my $health = $E_OK;
1058
1059 if ($snmp) {
1060 #
1061 # Checks global status, i.e. both storage and chassis
1062 #
1063 my $systemStateGlobalSystemStatus = '1.3.6.1.4.1.674.10892.1.200.10.1.2.1';
1064 my $result = $snmp_session->get_request(-varbindlist => [$systemStateGlobalSystemStatus]);
1065 if (!defined $result) {
98b224a3 1066 printf "SNMP ERROR [global]: %s\n", $snmp_error;
669797e1 1067 exit $E_UNKNOWN;
1068 }
1069 $health = $status2nagios{$snmp_status{$result->{$systemStateGlobalSystemStatus}}};
1070 }
1071 else {
1072 #
1073 # NB! This does not check storage, only chassis...
1074 #
1075 foreach (@{ run_command("$omreport $omopt_system -fmt ssv") }) {
1076 next if !m/;/xms;
1077 next if m/\A SEVERITY;COMPONENT/xms;
1078 if (m/\A (.+?);Main\sSystem(\sChassis)? /xms) {
1079 $health = $status2nagios{$1};
1080 last;
1081 }
1082 }
1083 }
1084
1085 # Return the status
1086 return $health;
1087}
1088
1089
1090#-----------------------------------------
1091# STORAGE: Check controllers
1092#-----------------------------------------
1093sub check_controllers {
1094 my $id = undef;
1095 my $nexus = undef;
1096 my $name = undef;
1097 my $state = undef;
1098 my $status = undef;
1099 my $minfw = undef;
1100 my $mindr = undef;
1101 my $firmware = undef;
1102 my $driver = undef;
1103 my @output = ();
1104
1105 if ($snmp) {
1106 my %ctrl_oid
1107 = (
1108 '1.3.6.1.4.1.674.10893.1.20.130.1.1.1' => 'controllerNumber',
1109 '1.3.6.1.4.1.674.10893.1.20.130.1.1.2' => 'controllerName',
1110 '1.3.6.1.4.1.674.10893.1.20.130.1.1.5' => 'controllerState',
1111 '1.3.6.1.4.1.674.10893.1.20.130.1.1.8' => 'controllerFWVersion',
1112 '1.3.6.1.4.1.674.10893.1.20.130.1.1.38' => 'controllerComponentStatus',
1113 '1.3.6.1.4.1.674.10893.1.20.130.1.1.39' => 'controllerNexusID',
1114 '1.3.6.1.4.1.674.10893.1.20.130.1.1.41' => 'controllerDriverVersion',
1115 '1.3.6.1.4.1.674.10893.1.20.130.1.1.44' => 'controllerMinFWVersion',
1116 '1.3.6.1.4.1.674.10893.1.20.130.1.1.45' => 'controllerMinDriverVersion',
1117 );
1118
1119 # We use get_table() here for the odd case where a server has
1120 # two or more controllers, and where some OIDs are missing on
1121 # one of the controllers.
1122 my $controllerTable = '1.3.6.1.4.1.674.10893.1.20.130.1';
1123 my $result = $snmp_session->get_table(-baseoid => $controllerTable);
1124
1125 # No controllers is OK
1126 return if !defined $result;
1127
1128 @output = @{ get_snmp_output($result, \%ctrl_oid) };
1129 }
1130 else {
1131 @output = @{ run_omreport('storage controller') };
1132 }
1133
1134 my %ctrl_state
1135 = (
1136 0 => 'Unknown',
1137 1 => 'Ready',
1138 2 => 'Failed',
1139 3 => 'Online',
1140 4 => 'Offline',
1141 6 => 'Degraded',
1142 );
1143
1144 CTRL:
1145 foreach my $out (@output) {
1146 if ($snmp) {
1147 $id = $out->{'controllerNumber'} - 1;
1148 $name = $out->{'controllerName'};
1149 $state = $ctrl_state{$out->{'controllerState'}};
1150 $status = $snmp_status{$out->{'controllerComponentStatus'}};
1151 $minfw = exists $out->{'controllerMinFWVersion'}
1152 ? $out->{'controllerMinFWVersion'} : undef;
1153 $mindr = exists $out->{'controllerMinDriverVersion'}
1154 ? $out->{'controllerMinDriverVersion'} : undef;
1155 $firmware = exists $out->{controllerFWVersion}
1156 ? $out->{controllerFWVersion} : 'N/A';
1157 $driver = exists $out->{controllerDriverVersion}
1158 ? $out->{controllerDriverVersion} : 'N/A';
1159 $nexus = convert_nexus($out->{controllerNexusID});
1160 }
1161 else {
1162 $id = $out->{ID};
1163 $name = $out->{Name};
1164 $state = $out->{State};
1165 $status = $out->{Status};
1166 $minfw = $out->{'Minimum Required Firmware Version'} ne 'Not Applicable'
1167 ? $out->{'Minimum Required Firmware Version'} : undef;
1168 $mindr = $out->{'Minimum Required Driver Version'} ne 'Not Applicable'
1169 ? $out->{'Minimum Required Driver Version'} : undef;
1170 $firmware = $out->{'Firmware Version'} ne 'Not Applicable'
1171 ? $out->{'Firmware Version'} : 'N/A';
1172 $driver = $out->{'Driver Version'} ne 'Not Applicable'
1173 ? $out->{'Driver Version'} : 'N/A';
1174 $nexus = $id;
1175 }
1176
1177 $name =~ s{\s+\z}{}xms; # remove trailing whitespace
1178 push @controllers, $id;
1179
1180 # Collecting some storage info
1181 $sysinfo{'controller'}{$id}{'id'} = $nexus;
1182 $sysinfo{'controller'}{$id}{'name'} = $name;
1183 $sysinfo{'controller'}{$id}{'driver'} = $driver;
1184 $sysinfo{'controller'}{$id}{'firmware'} = $firmware;
1185
1186 next CTRL if blacklisted('ctrl', $nexus);
1187
1188 # Special case: old firmware
1189 if (!blacklisted('ctrl_fw', $id) && defined $minfw) {
1190 chomp $firmware;
98b224a3 1191 my $msg = sprintf q{Controller %d [%s]: Firmware '%s' is out of date},
669797e1 1192 $id, $name, $firmware;
1193 report('storage', $msg, $E_WARNING, $nexus);
1194 }
1195 # Special case: old driver
1196 if (!blacklisted('ctrl_driver', $id) && defined $mindr) {
1197 chomp $driver;
98b224a3 1198 my $msg = sprintf q{Controller %d [%s]: Driver '%s' is out of date},
669797e1 1199 $id, $name, $driver;
1200 report('storage', $msg, $E_WARNING, $nexus);
1201 }
1202 # Ok
1203 if ($status eq 'Ok' or ($status eq 'Non-Critical'
1204 and (defined $minfw or defined $mindr))) {
98b224a3 1205 my $msg = sprintf 'Controller %d [%s] is %s',
669797e1 1206 $id, $name, $state;
1207 report('storage', $msg, $E_OK, $nexus);
1208 }
1209 # Default
1210 else {
98b224a3 1211 my $msg = sprintf 'Controller %d [%s] needs attention: %s',
669797e1 1212 $id, $name, $state;
1213 report('storage', $msg, $status2nagios{$status}, $nexus);
1214 }
1215 }
1216 return;
1217}
1218
1219
1220#-----------------------------------------
1221# STORAGE: Check physical drives
1222#-----------------------------------------
1223sub check_physical_disks {
1224 return if $#controllers == -1;
1225
1226 my $id = undef;
1227 my $nexus = undef;
1228 my $name = undef;
1229 my $state = undef;
1230 my $status = undef;
1231 my $fpred = undef;
1232 my $progr = undef;
1233 my $ctrl = undef;
1234 my $vendor = undef; # disk vendor
1235 my $product = undef; # product ID
1236 my $capacity = undef; # disk length (size) in bytes
1237 my @output = ();
1238
1239 if ($snmp) {
1240 my %pdisk_oid
1241 = (
1242 '1.3.6.1.4.1.674.10893.1.20.130.4.1.1' => 'arrayDiskNumber',
1243 '1.3.6.1.4.1.674.10893.1.20.130.4.1.2' => 'arrayDiskName',
1244 '1.3.6.1.4.1.674.10893.1.20.130.4.1.3' => 'arrayDiskVendor',
1245 '1.3.6.1.4.1.674.10893.1.20.130.4.1.4' => 'arrayDiskState',
1246 '1.3.6.1.4.1.674.10893.1.20.130.4.1.6' => 'arrayDiskProductID',
1247 '1.3.6.1.4.1.674.10893.1.20.130.4.1.9' => 'arrayDiskEnclosureID',
1248 '1.3.6.1.4.1.674.10893.1.20.130.4.1.10' => 'arrayDiskChannel',
1249 '1.3.6.1.4.1.674.10893.1.20.130.4.1.11' => 'arrayDiskLengthInMB',
1250 '1.3.6.1.4.1.674.10893.1.20.130.4.1.15' => 'arrayDiskTargetID',
1251 '1.3.6.1.4.1.674.10893.1.20.130.4.1.16' => 'arrayDiskLunID',
1252 '1.3.6.1.4.1.674.10893.1.20.130.4.1.24' => 'arrayDiskComponentStatus',
1253 '1.3.6.1.4.1.674.10893.1.20.130.4.1.26' => 'arrayDiskNexusID',
1254 '1.3.6.1.4.1.674.10893.1.20.130.4.1.31' => 'arrayDiskSmartAlertIndication',
1255 '1.3.6.1.4.1.674.10893.1.20.130.5.1.5' => 'arrayDiskEnclosureConnectionEnclosureNumber',
1256 '1.3.6.1.4.1.674.10893.1.20.130.5.1.7' => 'arrayDiskEnclosureConnectionControllerNumber',
1257 );
1258 my $result = $snmp_session->get_entries(-columns => [keys %pdisk_oid]);
1259
1260 if (!defined $result) {
98b224a3 1261 printf "SNMP ERROR [storage / pdisk]: %s.\n", $snmp_session->error;
669797e1 1262 $snmp_session->close;
1263 exit $E_UNKNOWN;
1264 }
1265
1266 @output = @{ get_snmp_output($result, \%pdisk_oid) };
1267 }
1268 else {
1269 foreach my $c (@controllers) {
1270 push @output, @{ run_omreport("storage pdisk controller=$c") };
1271 map_item('ctrl', $c, \@output);
1272 }
1273 }
1274
1275 my %pdisk_state
1276 = (
1277 0 => 'Unknown',
1278 1 => 'Ready',
1279 2 => 'Failed',
1280 3 => 'Online',
1281 4 => 'Offline',
1282 6 => 'Degraded',
1283 7 => 'Recovering',
1284 11 => 'Removed',
1285 15 => 'Resynching',
1286 24 => 'Rebuilding',
1287 25 => 'No Media',
1288 26 => 'Formatting',
1289 28 => 'Diagnostics',
1290 34 => 'Predictive failure',
1291 35 => 'Initializing',
1292 39 => 'Foreign',
1293 40 => 'Clear',
1294 41 => 'Unsupported',
1295 53 => 'Incompatible',
1296 );
1297
1298 # Check physical disks on each of the controllers
1299 PDISK:
1300 foreach my $out (@output) {
1301 if ($snmp) {
1302 $name = $out->{arrayDiskName};
1303 if ($name =~ m{.*\d+:\d+:\d+\z}xms) {
1304 $id = join q{:}, ($out->{arrayDiskChannel}, $out->{arrayDiskEnclosureID},
1305 $out->{arrayDiskTargetID});
1306 }
1307 else {
1308 $id = join q{:}, ($out->{arrayDiskChannel}, $out->{arrayDiskTargetID});
1309 }
1310 $state = $pdisk_state{$out->{arrayDiskState}};
1311 $status = $snmp_status{$out->{arrayDiskComponentStatus}};
1312 $fpred = $out->{arrayDiskSmartAlertIndication} == 2 ? 1 : 0;
1313 $progr = q{};
1314 $ctrl = exists $out->{arrayDiskEnclosureConnectionControllerNumber}
1315 ? $out->{arrayDiskEnclosureConnectionControllerNumber} - 1
1316 : -1;
1317 $nexus = convert_nexus($out->{arrayDiskNexusID});
1318 $vendor = $out->{arrayDiskVendor};
1319 $product = $out->{arrayDiskProductID};
1320 $capacity = $out->{arrayDiskLengthInMB} * 1024**2;
1321 }
1322 else {
1323 $id = $out->{'ID'};
1324 $name = $out->{'Name'};
1325 $state = $out->{'State'};
1326 $status = $out->{'Status'};
1327 $fpred = lc($out->{'Failure Predicted'}) eq 'yes' ? 1 : 0;
1328 $progr = ' [' . $out->{'Progress'} . ']';
1329 $ctrl = $out->{'ctrl'};
1330 $nexus = join q{:}, $out->{ctrl}, $id;
1331 $vendor = $out->{'Vendor ID'};
1332 $product = $out->{'Product ID'};
1333 $capacity = $out->{'Capacity'};
1334 $capacity =~ s{\A .*? \((\d+) \s bytes\) \z}{$1}xms;
1335 }
1336
1337 next PDISK if blacklisted('pdisk', $nexus);
1338 $count{pdisk}++;
1339
1340 $vendor =~ s{\s+\z}{}xms; # remove trailing whitespace
1341 $product =~ s{\s+\z}{}xms; # remove trailing whitespace
1342
1343 # Calculate human readable capacity
1344 $capacity = ceil($capacity / 1000**3) >= 1000
1345 ? sprintf '%.1fTB', ($capacity / 1000**4)
1346 : sprintf '%.0fGB', ($capacity / 1000**3);
1347 $capacity = '450GB' if $capacity eq '449GB'; # quick fix for 450GB disks
1348 $capacity = '146GB' if $capacity eq '147GB'; # quick fix for 146GB disks
1349 $capacity = '300GB' if $capacity eq '299GB'; # quick fix for 146GB disks
1350
1351 # Capitalize only the first letter of the vendor name
1352 $vendor = (substr $vendor, 0, 1) . lc (substr $vendor, 1, length $vendor);
1353
1354 # Remove unnecessary trademark rubbish from vendor name
1355 $vendor =~ s{\(tm\)\z}{}xms;
1356
1357 # Special case: Failure predicted
1358 if ($status eq 'Non-Critical' and $fpred) {
1ea483c4 1359 my $msg = sprintf '%s [%s %s, %s] on ctrl %d needs attention: Failure Predicted',
669797e1 1360 $name, $vendor, $product, $capacity, $ctrl;
1361 report('storage', $msg, $E_WARNING, $nexus);
1362 }
1363 # Special case: Rebuilding
1364 elsif ($state eq 'Rebuilding') {
477749ff 1365 my $msg = sprintf '%s [%s %s, %s] on ctrl %d is %s%s',
1366 $name, $vendor, $product, $capacity, $ctrl, $state, $progr;
669797e1 1367 report('storage', $msg, $E_WARNING, $nexus);
1368 }
1369 # Default
1370 elsif ($status ne 'Ok') {
1ea483c4 1371 my $msg = sprintf '%s [%s %s, %s] on ctrl %d needs attention: %s',
669797e1 1372 $name, $vendor, $product, $capacity, $ctrl, $state;
1373 report('storage', $msg, $status2nagios{$status}, $nexus);
1374 }
1375 # Ok
1376 else {
1ea483c4 1377 my $msg = sprintf '%s [%s] on ctrl %d is %s',
669797e1 1378 $name, $capacity, $ctrl, $state;
1379 report('storage', $msg, $E_OK, $nexus);
1380 }
1381 }
1382 return;
1383}
1384
1385
1386#-----------------------------------------
1387# STORAGE: Check logical drives
1388#-----------------------------------------
1389sub check_virtual_disks {
1390 return if $#controllers == -1;
1391
1392 my $id = undef;
25d04c34 1393 my $name = undef;
669797e1 1394 my $nexus = undef;
1395 my $dev = undef;
1396 my $state = undef;
1397 my $status = undef;
1398 my $layout = undef;
1399 my $size = undef;
1400 my $progr = undef;
25d04c34 1401 my $ctrl = undef;
669797e1 1402 my @output = ();
1403
1404 if ($snmp) {
1405 my %vdisk_oid
1406 = (
669797e1 1407 '1.3.6.1.4.1.674.10893.1.20.140.1.1.3' => 'virtualDiskDeviceName',
1408 '1.3.6.1.4.1.674.10893.1.20.140.1.1.4' => 'virtualDiskState',
1409 '1.3.6.1.4.1.674.10893.1.20.140.1.1.6' => 'virtualDiskLengthInMB',
1410 '1.3.6.1.4.1.674.10893.1.20.140.1.1.13' => 'virtualDiskLayout',
25d04c34 1411 '1.3.6.1.4.1.674.10893.1.20.140.1.1.17' => 'virtualDiskTargetID',
669797e1 1412 '1.3.6.1.4.1.674.10893.1.20.140.1.1.20' => 'virtualDiskComponentStatus',
1413 '1.3.6.1.4.1.674.10893.1.20.140.1.1.21' => 'virtualDiskNexusID',
1414 );
1415 my $result = $snmp_session->get_entries(-columns => [keys %vdisk_oid]);
1416
1417 # No logical drives is OK
1418 return if !defined $result;
1419
1420 @output = @{ get_snmp_output($result, \%vdisk_oid) };
1421 }
1422 else {
1423 foreach my $c (@controllers) {
1424 push @output, @{ run_omreport("storage vdisk controller=$c") };
1425 map_item('ctrl', $c, \@output);
1426 }
1427 }
1428
1429 my %vdisk_state
1430 = (
1431 0 => 'Unknown',
1432 1 => 'Ready',
1433 2 => 'Failed',
1434 3 => 'Online',
1435 4 => 'Offline',
1436 6 => 'Degraded',
1437 15 => 'Resynching',
1438 16 => 'Regenerating',
1439 24 => 'Rebuilding',
1440 26 => 'Formatting',
1441 32 => 'Reconstructing',
1442 35 => 'Initializing',
1443 36 => 'Background Initialization',
1444 38 => 'Resynching Paused',
1445 52 => 'Permanently Degraded',
1446 54 => 'Degraded Redundancy',
1447 );
1448
1449 my %vdisk_layout
1450 = (
1451 1 => 'Concatenated',
1452 2 => 'RAID-0',
1453 3 => 'RAID-1',
1454 7 => 'RAID-5',
1455 8 => 'RAID-6',
1456 10 => 'RAID-10',
1457 12 => 'RAID-50',
1458 19 => 'Concatenated RAID 1',
1459 24 => 'RAID-60',
1460 );
1461
1462 # Check virtual disks on each of the controllers
1463 VDISK:
1464 foreach my $out (@output) {
1465 if ($snmp) {
25d04c34 1466 $id = $out->{virtualDiskTargetID};
669797e1 1467 $dev = $out->{virtualDiskDeviceName};
1468 $state = $vdisk_state{$out->{virtualDiskState}};
1469 $status = $snmp_status{$out->{virtualDiskComponentStatus}};
1470 $layout = $vdisk_layout{$out->{virtualDiskLayout}};
1471 $size = sprintf '%.2f GB', $out->{virtualDiskLengthInMB} / 1024;
1472 $progr = q{}; # can't get this from SNMP(?)
1473 $nexus = convert_nexus($out->{virtualDiskNexusID});
25d04c34 1474 $ctrl = $nexus; # We use the nexus id to get the controller id
1475 $ctrl =~ s{\A (\d+):\d+ \z}{$1}xms;
669797e1 1476 }
1477 else {
1478 $id = $out->{ID};
1479 $dev = $out->{'Device Name'};
1480 $state = $out->{State};
1481 $status = $out->{Status};
1482 $layout = $out->{Layout};
1483 $size = $out->{Size};
1484 $progr = ' [' . $out->{Progress} . ']';
1485 $size =~ s{\A (.*GB).* \z}{$1}xms;
1486 $nexus = join q{:}, $out->{ctrl}, $id;
25d04c34 1487 $ctrl = $out->{ctrl};
669797e1 1488 }
1489
1490 next VDISK if blacklisted('vdisk', $nexus);
1491 $count{vdisk}++;
1492
1493 # Special case: Regenerating
1494 if ($state eq 'Regenerating') {
98b224a3 1495 my $msg = sprintf q{Logical drive %d '%s' [%s, %s] on ctrl %d is %s%s},
1496 $id, $dev, $layout, $size, $ctrl, $state, $progr;
669797e1 1497 report('storage', $msg, $E_WARNING, $nexus);
1498 }
1499 # Default
1500 elsif ($status ne 'Ok') {
98b224a3 1501 my $msg = sprintf q{Logical drive %d '%s' [%s, %s] on ctrl %d needs attention: %s},
1502 $id, $dev, $layout, $size, $ctrl, $state;
669797e1 1503 report('storage', $msg, $status2nagios{$status}, $nexus);
1504 }
1505 # Ok
1506 else {
98b224a3 1507 my $msg = sprintf q{Logical drive %d '%s' [%s, %s] on ctrl %d is %s},
1508 $id, $dev, $layout, $size, $ctrl, $state;
669797e1 1509 report('storage', $msg, $E_OK, $nexus);
1510 }
1511 }
1512 return;
1513}
1514
1515
1516#-----------------------------------------
1517# STORAGE: Check cache batteries
1518#-----------------------------------------
1519sub check_cache_battery {
1520 return if $#controllers == -1;
1521
1522 my $id = undef;
1523 my $nexus = undef;
1524 my $state = undef;
1525 my $status = undef;
1526 my $ctrl = undef;
1527 my $learn = undef; # learn state
1528 my $pred = undef; # battery's ability to be charged
1529 my @output = ();
1530
1531 if ($snmp) {
1532 my %bat_oid
1533 = (
669797e1 1534 '1.3.6.1.4.1.674.10893.1.20.130.15.1.4' => 'batteryState',
1535 '1.3.6.1.4.1.674.10893.1.20.130.15.1.6' => 'batteryComponentStatus',
1536 '1.3.6.1.4.1.674.10893.1.20.130.15.1.9' => 'batteryNexusID',
1537 '1.3.6.1.4.1.674.10893.1.20.130.15.1.10' => 'batteryPredictedCapacity',
1538 '1.3.6.1.4.1.674.10893.1.20.130.15.1.12' => 'batteryLearnState',
1539 '1.3.6.1.4.1.674.10893.1.20.130.16.1.5' => 'batteryConnectionControllerNumber',
1540 );
1541 my $result = $snmp_session->get_entries(-columns => [keys %bat_oid]);
1542
1543 # No cache battery is OK
1544 return if !defined $result;
1545
1546 @output = @{ get_snmp_output($result, \%bat_oid) };
1547 }
1548 else {
1549 foreach my $c (@controllers) {
1550 push @output, @{ run_omreport("storage battery controller=$c") };
1551 map_item('ctrl', $c, \@output);
1552 }
1553 }
1554
1555 my %bat_state
1556 = (
1557 0 => 'Unknown',
1558 1 => 'Ready',
1559 2 => 'Failed',
1560 6 => 'Degraded',
1561 7 => 'Reconditioning',
1562 9 => 'High',
1563 10 => 'Power Low',
1564 12 => 'Charging',
1565 21 => 'Missing',
1566 36 => 'Learning',
1567 );
1568
1569 my %bat_learn_state
1570 = (
1571 1 => 'Failed',
1572 2 => 'Active',
1573 4 => 'Timed out',
1574 8 => 'Requested',
1575 16 => 'Idle',
1576 );
1577
1578 my %bat_pred_cap
1579 = (
1580 1 => 'Failed', # The battery cannot be charged and needs to be replaced
1581 2 => 'Ready', # The battery can be charged to full capacity
1582 4 => 'Unknown', # The battery is completing a Learn cycle. The charge capacity of the
1583 # battery cannot be determined until the Learn cycle is complete
1584 );
1585
1586 # Check battery on each of the controllers
1587 BATTERY:
1588 foreach my $out (@output) {
1589 if ($snmp) {
669797e1 1590 $state = $bat_state{$out->{batteryState}};
1591 $status = $snmp_status{$out->{batteryComponentStatus}};
1592 $learn = exists $out->{batteryLearnState}
1593 ? $bat_learn_state{$out->{batteryLearnState}} : undef;
1594 $pred = exists $out->{batteryPredictedCapacity}
1595 ? $bat_pred_cap{$out->{batteryPredictedCapacity}} : undef;
1596 $ctrl = $out->{batteryConnectionControllerNumber} - 1;
1597 $nexus = convert_nexus($out->{batteryNexusID});
25d04c34 1598 $id = $nexus;
1599 $id =~ s{\A \d+:(\d+) \z}{$1}xms;
669797e1 1600 }
1601 else {
1602 $id = $out->{'ID'};
1603 $state = $out->{'State'};
1604 $status = $out->{'Status'};
1605 $learn = $out->{'Learn State'};
1606 $pred = $out->{'Predicted Capacity Status'};
1607 $ctrl = $out->{'ctrl'};
1608 $nexus = join q{:}, $out->{ctrl}, $id;
1609 }
1610
1611 next BATTERY if blacklisted('bat', $nexus);
1612
1613 # Special case: Charging
1614 if ($state eq 'Charging') {
5a28cf7f 1615 next BATTERY if blacklisted('bat_charge', $nexus);
669797e1 1616 my $msg = sprintf 'Cache battery %d in controller %d is %s (%s) [probably harmless]',
1617 $id, $ctrl, $state, $pred;
1618 report('storage', $msg, $E_WARNING, $nexus);
1619 }
1620 # Special case: Learning (battery learns its capacity)
1621 elsif ($state eq 'Learning') {
5a28cf7f 1622 next BATTERY if blacklisted('bat_charge', $nexus);
669797e1 1623 my $msg = sprintf 'Cache battery %d in controller %d is %s (%s) [probably harmless]',
1624 $id, $ctrl, $state, $learn;
1625 report('storage', $msg, $E_WARNING, $nexus);
1626 }
1627 # Special case: Power Low (first part of recharge cycle)
1628 elsif ($state eq 'Power Low') {
5a28cf7f 1629 next BATTERY if blacklisted('bat_charge', $nexus);
669797e1 1630 my $msg = sprintf 'Cache battery %d in controller %d is %s [probably harmless]',
1631 $id, $ctrl, $state;
1632 report('storage', $msg, $E_WARNING, $nexus);
1633 }
5a28cf7f 1634 # Special case: Degraded and Non-Critical (usually part of recharge cycle)
1635 elsif ($state eq 'Degraded' && $status eq 'Non-Critical') {
1636 next BATTERY if blacklisted('bat_charge', $nexus);
1637 my $msg = sprintf 'Cache battery %d in controller %d is %s (%s) [probably harmless]',
1638 $id, $ctrl, $state, $status;
1639 report('storage', $msg, $E_WARNING, $nexus);
1640 }
669797e1 1641 # Default
1642 elsif ($status ne 'Ok') {
1643 my $msg = sprintf 'Cache battery %d in controller %d needs attention: %s (%s)',
1644 $id, $ctrl, $state, $status;
1645 report('storage', $msg, $status2nagios{$status}, $nexus);
1646 }
1647 # Ok
1648 else {
1649 my $msg = sprintf 'Cache battery %d in controller %d is %s',
1650 $id, $ctrl, $state;
1651 report('storage', $msg, $E_OK, $nexus);
1652 }
1653 }
1654 return;
1655}
1656
1657
1658#-----------------------------------------
1659# STORAGE: Check connectors (channels)
1660#-----------------------------------------
1661sub check_connectors {
1662 return if $#controllers == -1;
1663
1664 my $id = undef;
1665 my $nexus = undef;
1666 my $name = undef;
1667 my $state = undef;
1668 my $status = undef;
1669 my $type = undef;
1670 my $ctrl = undef;
1671 my @output = ();
1672
1673 if ($snmp) {
1674 my %conn_oid
1675 = (
1676 '1.3.6.1.4.1.674.10893.1.20.130.2.1.1' => 'channelNumber',
1677 '1.3.6.1.4.1.674.10893.1.20.130.2.1.2' => 'channelName',
1678 '1.3.6.1.4.1.674.10893.1.20.130.2.1.3' => 'channelState',
1679 '1.3.6.1.4.1.674.10893.1.20.130.2.1.8' => 'channelComponentStatus',
1680 '1.3.6.1.4.1.674.10893.1.20.130.2.1.9' => 'channelNexusID',
1681 '1.3.6.1.4.1.674.10893.1.20.130.2.1.11' => 'channelBusType',
1682 );
1683 my $result = $snmp_session->get_entries(-columns => [keys %conn_oid]);
1684
1685 if (!defined $result) {
98b224a3 1686 printf "SNMP ERROR [storage / channel]: %s.\n", $snmp_session->error;
669797e1 1687 $snmp_session->close;
1688 exit $E_UNKNOWN;
1689 }
1690
1691 @output = @{ get_snmp_output($result, \%conn_oid) };
1692 }
1693 else {
1694 foreach my $c (@controllers) {
1695 push @output, @{ run_omreport("storage connector controller=$c") };
1696 map_item('ctrl', $c, \@output);
1697 }
1698 }
1699
1700 my %conn_state
1701 = (
1702 0 => 'Unknown',
1703 1 => 'Ready',
1704 2 => 'Failed',
1705 3 => 'Online',
1706 4 => 'Offline',
1707 6 => 'Degraded',
1708 );
1709
1710 my %conn_bustype
1711 = (
1712 1 => 'SCSI',
1713 2 => 'IDE',
1714 3 => 'Fibre Channel',
1715 4 => 'SSA',
1716 6 => 'USB',
1717 7 => 'SATA',
1718 8 => 'SAS',
1719 );
1720
1721 # Check connectors on each of the controllers
1722 CHANNEL:
1723 foreach my $out (@output) {
1724 if ($snmp) {
1725 $id = $out->{channelNumber} - 1;
1726 $name = $out->{channelName};
1727 $state = $conn_state{$out->{channelState}};
1728 $status = $snmp_status{$out->{channelComponentStatus}};
1729 $type = $conn_bustype{$out->{channelBusType}};
1730 $nexus = convert_nexus($out->{channelNexusID});
1731 $ctrl = $nexus;
1732 $ctrl =~ s{(\d+):\d+}{$1}xms;
1733 }
1734 else {
1735 $id = $out->{'ID'};
1736 $name = $out->{'Name'};
1737 $state = $out->{'State'};
1738 $status = $out->{'Status'};
1739 $type = $out->{'Connector Type'};
1740 $ctrl = $out->{ctrl};
1741 $nexus = join q{:}, $out->{ctrl}, $id;
1742 }
1743
1744 next CHANNEL if blacklisted('conn', $nexus);
1745
98b224a3 1746 my $msg = sprintf '%s [%s] on controller %d is %s',
669797e1 1747 $name, $type, $ctrl, $state;
1748 report('storage', $msg, $status2nagios{$status}, $nexus);
1749 }
1750 return;
1751}
1752
1753
1754#-----------------------------------------
1755# STORAGE: Check enclosures
1756#-----------------------------------------
1757sub check_enclosures {
1758 my $id = undef;
1759 my $nexus = undef;
1760 my $name = undef;
1761 my $state = undef;
1762 my $status = undef;
1763 my $firmware = undef;
25d04c34 1764 my $ctrl = undef;
669797e1 1765 my @output = ();
1766
1767 if ($snmp) {
1768 my %encl_oid
1769 = (
1770 '1.3.6.1.4.1.674.10893.1.20.130.3.1.1' => 'enclosureNumber',
1771 '1.3.6.1.4.1.674.10893.1.20.130.3.1.2' => 'enclosureName',
1772 '1.3.6.1.4.1.674.10893.1.20.130.3.1.4' => 'enclosureState',
1773 '1.3.6.1.4.1.674.10893.1.20.130.3.1.19' => 'enclosureChannelNumber',
1774 '1.3.6.1.4.1.674.10893.1.20.130.3.1.24' => 'enclosureComponentStatus',
1775 '1.3.6.1.4.1.674.10893.1.20.130.3.1.25' => 'enclosureNexusID',
1776 '1.3.6.1.4.1.674.10893.1.20.130.3.1.26' => 'enclosureFirmwareVersion',
1777 );
1778 my $result = $snmp_session->get_entries(-columns => [keys %encl_oid]);
1779
1780 # No enclosures is OK
1781 return if !defined $result;
1782
1783 @output = @{ get_snmp_output($result, \%encl_oid) };
1784 }
1785 else {
1786 foreach my $c (@controllers) {
1787 push @output, @{ run_omreport("storage enclosure controller=$c") };
1788 map_item('ctrl', $c, \@output);
1789 }
1790 }
1791
1792 my %encl_state
1793 = (
1794 0 => 'Unknown',
1795 1 => 'Ready',
1796 2 => 'Failed',
1797 3 => 'Online',
1798 4 => 'Offline',
1799 6 => 'Degraded',
1800 );
1801
1802 ENCLOSURE:
1803 foreach my $out (@output) {
1804 if ($snmp) {
1805 $id = $out->{'enclosureNumber'} - 1;
1806 $name = $out->{'enclosureName'};
1807 $state = $encl_state{$out->{'enclosureState'}};
1808 $status = $snmp_status{$out->{'enclosureComponentStatus'}};
1809 $firmware = exists $out->{enclosureFirmwareVersion}
1810 ? $out->{enclosureFirmwareVersion} : 'N/A';
1811 $nexus = convert_nexus($out->{enclosureNexusID});
25d04c34 1812 $ctrl = $nexus;
1813 $ctrl =~ s{\A (\d+):.* \z}{$1}xms;
669797e1 1814 }
1815 else {
1816 $id = $out->{ID};
1817 $name = $out->{Name};
1818 $state = $out->{State};
1819 $status = $out->{Status};
1820 $firmware = $out->{'Firmware Version'} ne 'Not Applicable'
1821 ? $out->{'Firmware Version'} : 'N/A';
1822 $nexus = join q{:}, $out->{ctrl}, $id;
25d04c34 1823 $ctrl = $out->{ctrl};
669797e1 1824 }
1825
1826 $name =~ s{\s+\z}{}xms; # remove trailing whitespace
1827 $firmware =~ s{\s+\z}{}xms; # remove trailing whitespace
1828
1829 # store enclosure data for future use
1830 push @enclosures, { 'id' => $id,
1831 'ctrl' => $out->{ctrl},
1832 'name' => $name };
1833
1834 # Collecting some storage info
1835 $sysinfo{'enclosure'}{$nexus}{'id'} = $nexus;
1836 $sysinfo{'enclosure'}{$nexus}{'name'} = $name;
1837 $sysinfo{'enclosure'}{$nexus}{'firmware'} = $firmware;
1838
1839 next ENCLOSURE if blacklisted('encl', $nexus);
1840
98b224a3 1841 my $msg = sprintf 'Enclosure %s [%s] on controller %d is %s',
25d04c34 1842 $nexus, $name, $ctrl, $state;
669797e1 1843 report('storage', $msg, $status2nagios{$status}, $nexus);
1844 }
1845 return;
1846}
1847
1848
1849#-----------------------------------------
1850# STORAGE: Check enclosure fans
1851#-----------------------------------------
1852sub check_enclosure_fans {
1853 return if $#controllers == -1;
1854
1855 my $id = undef;
1856 my $nexus = undef;
1857 my $name = undef;
1858 my $state = undef;
1859 my $status = undef;
1860 my $speed = undef;
1861 my $encl_id = undef;
1862 my $encl_name = undef;
1863 my @output = ();
1864
1865 if ($snmp) {
1866 my %fan_oid
1867 = (
1868 '1.3.6.1.4.1.674.10893.1.20.130.7.1.1' => 'fanNumber',
1869 '1.3.6.1.4.1.674.10893.1.20.130.7.1.2' => 'fanName',
1870 '1.3.6.1.4.1.674.10893.1.20.130.7.1.4' => 'fanState',
1871 '1.3.6.1.4.1.674.10893.1.20.130.7.1.11' => 'fanProbeCurrValue',
1872 '1.3.6.1.4.1.674.10893.1.20.130.7.1.15' => 'fanComponentStatus',
1873 '1.3.6.1.4.1.674.10893.1.20.130.7.1.16' => 'fanNexusID',
1874 '1.3.6.1.4.1.674.10893.1.20.130.8.1.4' => 'fanConnectionEnclosureName',
1875 '1.3.6.1.4.1.674.10893.1.20.130.8.1.5' => 'fanConnectionEnclosureNumber',
1876 );
1877
1878 my $result = $snmp_session->get_entries(-columns => [keys %fan_oid]);
1879
1880 # No enclosure fans is OK
1881 return if !defined $result;
1882
1883 @output = @{ get_snmp_output($result, \%fan_oid) };
1884 }
1885 else {
1886 foreach my $enc (@enclosures) {
1887 push @output, @{ run_omreport("storage enclosure controller=$enc->{ctrl} enclosure=$enc->{id} info=fans") };
1888 map_item('ctrl', $enc->{ctrl}, \@output);
1889 map_item('encl_id', $enc->{id}, \@output);
1890 map_item('encl_name', $enc->{name}, \@output);
1891 }
1892 }
1893
1894 my %fan_state
1895 = (
1896 0 => 'Unknown',
1897 1 => 'Ready',
1898 2 => 'Failed',
1899 3 => 'Online',
1900 4 => 'Offline',
1901 6 => 'Degraded',
1902 21 => 'Missing',
1903 );
1904
1905 # Check fans on each of the enclosures
1906 FAN:
1907 foreach my $out (@output) {
1908 if ($snmp) {
1909 $id = $out->{fanNumber} - 1;
1910 $name = $out->{fanName};
1911 $state = $fan_state{$out->{fanState}};
1912 $status = $snmp_status{$out->{fanComponentStatus}};
1913 $speed = $out->{fanProbeCurrValue};
1914 $encl_id = $out->{fanConnectionEnclosureNumber} - 1;
1915 $encl_name = $out->{fanConnectionEnclosureName};
1916 $nexus = convert_nexus($out->{fanNexusID});
1917 }
1918 else {
1919 $id = $out->{'ID'};
1920 $name = $out->{'Name'};
1921 $state = $out->{'State'};
1922 $status = $out->{'Status'};
1923 $speed = $out->{'Speed'};
1924 $encl_id = join q{:}, $out->{ctrl}, $out->{'encl_id'};
1925 $encl_name = $out->{encl_name};
1926 $nexus = join q{:}, $out->{ctrl}, $out->{'encl_id'}, $id;
1927 }
1928
1929 next FAN if blacklisted('encl_fan', $nexus);
1930
1931 # Default
1932 if ($status ne 'Ok') {
98b224a3 1933 my $msg = sprintf '%s in enclosure %s [%s] needs attention: %s',
669797e1 1934 $name, $encl_id, $encl_name, $state;
1935 report('storage', $msg, $status2nagios{$status}, $nexus);
1936 }
1937 # Ok
1938 else {
98b224a3 1939 my $msg = sprintf '%s in enclosure %s [%s] is %s (speed=%s)',
669797e1 1940 $name, $encl_id, $encl_name, $state, $speed;
1941 report('storage', $msg, $E_OK, $nexus);
1942 }
1943 }
1944 return;
1945}
1946
1947
1948#-----------------------------------------
1949# STORAGE: Check enclosure power supplies
1950#-----------------------------------------
1951sub check_enclosure_pwr {
1952 return if $#controllers == -1;
1953
1954 my $id = undef;
1955 my $nexus = undef;
1956 my $name = undef;
1957 my $state = undef;
1958 my $status = undef;
1959 my $encl_id = undef;
1960 my $encl_name = undef;
1961 my @output = ();
1962
1963 if ($snmp) {
1964 my %ps_oid
1965 = (
1966 '1.3.6.1.4.1.674.10893.1.20.130.9.1.1' => 'powerSupplyNumber',
1967 '1.3.6.1.4.1.674.10893.1.20.130.9.1.2' => 'powerSupplyName',
1968 '1.3.6.1.4.1.674.10893.1.20.130.9.1.4' => 'powerSupplyState',
1969 '1.3.6.1.4.1.674.10893.1.20.130.9.1.9' => 'powerSupplyComponentStatus',
1970 '1.3.6.1.4.1.674.10893.1.20.130.9.1.10' => 'powerSupplyNexusID',
1971 '1.3.6.1.4.1.674.10893.1.20.130.10.1.4' => 'powerSupplyConnectionEnclosureName',
1972 '1.3.6.1.4.1.674.10893.1.20.130.10.1.5' => 'powerSupplyConnectionEnclosureNumber',
1973 );
1974 my $result = $snmp_session->get_entries(-columns => [keys %ps_oid]);
1975
1976 # No enclosure power supplies is OK
1977 return if !defined $result;
1978
1979 @output = @{ get_snmp_output($result, \%ps_oid) };
1980 }
1981 else {
1982 foreach my $enc (@enclosures) {
1983 push @output, @{ run_omreport("storage enclosure controller=$enc->{ctrl} enclosure=$enc->{id} info=pwrsupplies") };
1984 map_item('ctrl', $enc->{ctrl}, \@output);
1985 map_item('encl_id', $enc->{id}, \@output);
1986 map_item('encl_name', $enc->{name}, \@output);
1987 }
1988 }
1989
1990 my %ps_state
1991 = (
1992 0 => 'Unknown',
1993 1 => 'Ready',
1994 2 => 'Failed',
1995 5 => 'Not Installed',
1996 6 => 'Degraded',
1997 11 => 'Removed',
1998 21 => 'Missing',
1999 );
2000
2001 # Check power supplies on each of the enclosures
2002 PS:
2003 foreach my $out (@output) {
2004 if ($snmp) {
2005 $id = $out->{powerSupplyNumber};
2006 $name = $out->{powerSupplyName};
2007 $state = $ps_state{$out->{powerSupplyState}};
2008 $status = $snmp_status{$out->{powerSupplyComponentStatus}};
2009 $encl_id = $out->{powerSupplyConnectionEnclosureNumber} - 1;
2010 $encl_name = $out->{powerSupplyConnectionEnclosureName};
2011 $nexus = convert_nexus($out->{powerSupplyNexusID});
2012 }
2013 else {
2014 $id = $out->{'ID'};
2015 $name = $out->{'Name'};
2016 $state = $out->{'State'};
2017 $status = $out->{'Status'};
2018 $encl_id = join q{:}, $out->{ctrl}, $out->{'encl_id'};
2019 $encl_name = $out->{encl_name};
2020 $nexus = join q{:}, $out->{ctrl}, $out->{'encl_id'}, $id;
2021 }
2022
2023 next PS if blacklisted('encl_ps', $nexus);
2024
2025 # Default
2026 if ($status ne 'Ok') {
98b224a3 2027 my $msg = sprintf '%s in enclosure %s [%s] needs attention: %s',
669797e1 2028 $name, $encl_id, $encl_name, $state;
2029 report('storage', $msg, $status2nagios{$status}, $nexus);
2030 }
2031 # Ok
2032 else {
98b224a3 2033 my $msg = sprintf '%s in enclosure %s [%s] is %s',
669797e1 2034 $name, $encl_id, $encl_name, $state;
2035 report('storage', $msg, $E_OK, $nexus);
2036 }
2037 }
2038 return;
2039}
2040
2041
2042#-----------------------------------------
2043# STORAGE: Check enclosure temperatures
2044#-----------------------------------------
2045sub check_enclosure_temp {
2046 return if $#controllers == -1;
2047
2048 my $id = undef;
2049 my $nexus = undef;
2050 my $name = undef;
2051 my $state = undef;
2052 my $status = undef;
2053 my $reading = undef;
2054 my $unit = undef;
2055 my $max_warn = undef;
2056 my $max_crit = undef;
2057 my $encl_id = undef;
2058 my $encl_name = undef;
2059 my @output = ();
2060
2061 if ($snmp) {
2062 my %temp_oid
2063 = (
2064 '1.3.6.1.4.1.674.10893.1.20.130.11.1.1' => 'temperatureProbeNumber',
2065 '1.3.6.1.4.1.674.10893.1.20.130.11.1.2' => 'temperatureProbeName',
2066 '1.3.6.1.4.1.674.10893.1.20.130.11.1.4' => 'temperatureProbeState',
2067 '1.3.6.1.4.1.674.10893.1.20.130.11.1.6' => 'temperatureProbeUnit',
2068 '1.3.6.1.4.1.674.10893.1.20.130.11.1.9' => 'temperatureProbeMaxWarning',
2069 '1.3.6.1.4.1.674.10893.1.20.130.11.1.10' => 'temperatureProbeMaxCritical',
2070 '1.3.6.1.4.1.674.10893.1.20.130.11.1.11' => 'temperatureProbeCurValue',
2071 '1.3.6.1.4.1.674.10893.1.20.130.11.1.13' => 'temperatureProbeComponentStatus',
2072 '1.3.6.1.4.1.674.10893.1.20.130.11.1.14' => 'temperatureProbeNexusID',
2073 '1.3.6.1.4.1.674.10893.1.20.130.12.1.4' => 'temperatureConnectionEnclosureName',
2074 '1.3.6.1.4.1.674.10893.1.20.130.12.1.5' => 'temperatureConnectionEnclosureNumber',
2075 );
2076 my $result = $snmp_session->get_entries(-columns => [keys %temp_oid]);
2077
2078 # No enclosure temperature probes is OK
2079 return if !defined $result;
2080
2081 @output = @{ get_snmp_output($result, \%temp_oid) };
2082 }
2083 else {
2084 foreach my $enc (@enclosures) {
2085 push @output, @{ run_omreport("storage enclosure controller=$enc->{ctrl} enclosure=$enc->{id} info=temps") };
2086 map_item('ctrl', $enc->{ctrl}, \@output);
2087 map_item('encl_id', $enc->{id}, \@output);
2088 map_item('encl_name', $enc->{name}, \@output);
2089 }
2090 }
2091
2092 my %temp_state
2093 = (
2094 0 => 'Unknown',
2095 1 => 'Ready',
2096 2 => 'Failed',
2097 4 => 'Offline',
2098 6 => 'Degraded',
2099 9 => 'Inactive',
2100 21 => 'Missing',
2101 );
2102
2103 # Check temperature probes on each of the enclosures
2104 TEMP:
2105 foreach my $out (@output) {
2106 if ($snmp) {
2107 $id = $out->{temperatureProbeNumber} - 1;
2108 $name = $out->{temperatureProbeName};
2109 $state = $temp_state{$out->{temperatureProbeState}};
2110 $status = $snmp_status{$out->{temperatureProbeComponentStatus}};
2111 $unit = $out->{temperatureProbeUnit};
2112 $reading = $out->{temperatureProbeCurValue};
2113 $max_warn = $out->{temperatureProbeMaxWarning};
2114 $max_crit = $out->{temperatureProbeMaxCritical};
2115 $encl_id = $out->{temperatureConnectionEnclosureNumber} - 1;
2116 $encl_name = $out->{temperatureConnectionEnclosureName};
2117 $nexus = convert_nexus($out->{temperatureProbeNexusID});
2118 }
2119 else {
2120 $id = $out->{'ID'};
2121 $name = $out->{'Name'};
2122 $state = $out->{'State'};
2123 $status = $out->{'Status'};
2124 $unit = 'FIXME';
2125 $reading = $out->{'Reading'}; $reading =~ s{\s*C}{}xms;
2126 $max_warn = $out->{'Maximum Warning Threshold'}; $max_warn =~ s{\s*C}{}xms;
2127 $max_crit = $out->{'Maximum Failure Threshold'}; $max_crit =~ s{\s*C}{}xms;
2128 $encl_id = join q{:}, $out->{ctrl}, $out->{'encl_id'};
2129 $encl_name = $out->{encl_name};
2130 $nexus = join q{:}, $out->{ctrl}, $out->{'encl_id'}, $id;
2131 }
2132
2133 next TEMP if blacklisted('encl_temp', $nexus);
2134
2135 # Default
2136 if ($status ne 'Ok') {
8a565bfc 2137 my $msg = sprintf '%s in enclosure %s [%s] is %s C at %s (%s max)',
669797e1 2138 $name, $encl_id, $encl_name, $state, $reading, $max_crit;
2139 report('storage', $msg, $status2nagios{$status}, $nexus);
2140 }
2141 # Ok
2142 else {
8a565bfc 2143 my $msg = sprintf '%s in enclosure %s [%s]: %s C (%s max)',
669797e1 2144 $name, $encl_id, $encl_name, $reading, $max_crit;
2145 report('storage', $msg, $E_OK, $nexus);
2146 }
2147
2148 # Collect performance data
2149 if (defined $opt{perfdata}) {
2150 $name =~ s{\A Temperature\sProbe\s(\d+) \z}{temp_$1}gxms;
2151 my $pkey = "enclosure_${encl_id}_${name}";
2152 my $pval = join q{;}, "${reading}C", $max_warn, $max_crit;
2153 $perfdata{$pkey} = $pval;
2154 }
2155 }
2156 return;
2157}
2158
2159
2160#-----------------------------------------
2161# STORAGE: Check enclosure management modules (EMM)
2162#-----------------------------------------
2163sub check_enclosure_emms {
2164 return if $#controllers == -1;
2165
2166 my $id = undef;
2167 my $nexus = undef;
2168 my $name = undef;
2169 my $state = undef;
2170 my $status = undef;
2171 my $encl_id = undef;
2172 my $encl_name = undef;
2173 my @output = ();
2174
2175 if ($snmp) {
2176 my %emms_oid
2177 = (
2178 '1.3.6.1.4.1.674.10893.1.20.130.13.1.1' => 'enclosureManagementModuleNumber',
2179 '1.3.6.1.4.1.674.10893.1.20.130.13.1.2' => 'enclosureManagementModuleName',
2180 '1.3.6.1.4.1.674.10893.1.20.130.13.1.4' => 'enclosureManagementModuleState',
2181 '1.3.6.1.4.1.674.10893.1.20.130.13.1.11' => 'enclosureManagementModuleComponentStatus',
2182 '1.3.6.1.4.1.674.10893.1.20.130.13.1.12' => 'enclosureManagementModuleNexusID',
2183 '1.3.6.1.4.1.674.10893.1.20.130.14.1.4' => 'enclosureManagementModuleConnectionEnclosureName',
2184 '1.3.6.1.4.1.674.10893.1.20.130.14.1.5' => 'enclosureManagementModuleConnectionEnclosureNumber',
2185 );
2186 my $result = $snmp_session->get_entries(-columns => [keys %emms_oid]);
2187
2188 # No enclosure EMMs is OK
2189 return if !defined $result;
2190
2191 @output = @{ get_snmp_output($result, \%emms_oid) };
2192 }
2193 else {
2194 foreach my $enc (@enclosures) {
2195 push @output, @{ run_omreport("storage enclosure controller=$enc->{ctrl} enclosure=$enc->{id} info=emms") };
2196 map_item('ctrl', $enc->{ctrl}, \@output);
2197 map_item('encl_id', $enc->{id}, \@output);
2198 map_item('encl_name', $enc->{name}, \@output);
2199 }
2200 }
2201
2202 my %emms_state
2203 = (
2204 0 => 'Unknown',
2205 1 => 'Ready',
2206 2 => 'Failed',
2207 3 => 'Online',
2208 4 => 'Offline',
2209 5 => 'Not Installed',
2210 6 => 'Degraded',
2211 21 => 'Missing',
2212 );
2213
2214 # Check temperature probes on each of the enclosures
2215 EMM:
2216 foreach my $out (@output) {
2217 if ($snmp) {
2218 $id = $out->{enclosureManagementModuleNumber} - 1;
2219 $name = $out->{enclosureManagementModuleName};
2220 $state = $emms_state{$out->{enclosureManagementModuleState}};
2221 $status = $snmp_status{$out->{enclosureManagementModuleComponentStatus}};
2222 $encl_id = $out->{enclosureManagementModuleConnectionEnclosureNumber} - 1;
2223 $encl_name = $out->{enclosureManagementModuleConnectionEnclosureName};
2224 $nexus = convert_nexus($out->{enclosureManagementModuleNexusID});
2225 }
2226 else {
2227 $id = $out->{'ID'};
2228 $name = $out->{'Name'};
2229 $state = $out->{'State'};
2230 $status = $out->{'Status'};
2231 $encl_id = join q{:}, $out->{ctrl}, $out->{'encl_id'};
2232 $encl_name = $out->{encl_name};
2233 $nexus = join q{:}, $out->{ctrl}, $out->{'encl_id'}, $id;
2234 }
2235
2236 next EMM if blacklisted('encl_emm', $nexus);
2237
2238 # Default
2239 if ($status ne 'Ok') {
98b224a3 2240 my $msg = sprintf '%s in enclosure %s [%s] needs attention: %s',
669797e1 2241 $name, $encl_id, $encl_name, $state;
2242 report('storage', $msg, $status2nagios{$status}, $nexus);
2243 }
2244 # Ok
2245 else {
98b224a3 2246 my $msg = sprintf '%s in enclosure %s [%s] is %s',
669797e1 2247 $name, $encl_id, $encl_name, $state;
2248 report('storage', $msg, $E_OK, $nexus);
2249 }
2250 }
2251 return;
2252}
2253
2254
2255#-----------------------------------------
2256# CHASSIS: Check memory modules
2257#-----------------------------------------
2258sub check_memory {
2259 my $index = undef;
2260 my $status = undef;
2261 my $location = undef;
2262 my $size = undef;
2263 my $modes = undef;
2264 my @failures = ();
2265 my @output = ();
2266
2267 if ($snmp) {
2268 my %dimm_oid
2269 = (
2270 '1.3.6.1.4.1.674.10892.1.1100.50.1.2.1' => 'memoryDeviceIndex',
2271 '1.3.6.1.4.1.674.10892.1.1100.50.1.5.1' => 'memoryDeviceStatus',
2272 '1.3.6.1.4.1.674.10892.1.1100.50.1.8.1' => 'memoryDeviceLocationName',
2273 '1.3.6.1.4.1.674.10892.1.1100.50.1.14.1' => 'memoryDeviceSize',
2274 '1.3.6.1.4.1.674.10892.1.1100.50.1.20.1' => 'memoryDeviceFailureModes',
2275 );
2276 my $result = $snmp_session->get_entries(-columns => [keys %dimm_oid]);
2277
2278 if (!defined $result) {
98b224a3 2279 printf "SNMP ERROR [memory]: %s.\n", $snmp_session->error;
669797e1 2280 $snmp_session->close;
2281 exit $E_UNKNOWN;
2282 }
2283
2284 @output = @{ get_snmp_output($result, \%dimm_oid) };
2285 }
2286 else {
2287 @output = @{ run_omreport("$omopt_chassis memory") };
2288 }
2289
2290 # Note: These values are bit masks, so combination values are
2291 # possible. If value is 0 (zero), memory device has no faults.
2292 my %failure_mode
2293 = (
2294 1 => 'ECC single bit correction warning rate exceeded',
2295 2 => 'ECC single bit correction failure rate exceeded',
2296 4 => 'ECC multibit fault encountered',
2297 8 => 'ECC single bit correction logging disabled',
2298 16 => 'device disabled because of spare activation',
2299 );
2300
2301 DIMM:
2302 foreach my $out (@output) {
2303 @failures = (); # Initialize
2304 if ($snmp) {
2305 $index = $out->{memoryDeviceIndex};
2306 $status = $snmp_status{$out->{memoryDeviceStatus}};
2307 $location = $out->{memoryDeviceLocationName};
2308 $size = sprintf '%d MB', $out->{memoryDeviceSize}/1024;
2309 $modes = $out->{memoryDeviceFailureModes};
2310 if ($modes > 0) {
2311 foreach my $mask (sort keys %failure_mode) {
2312 if (($modes & $mask) != 0) { push @failures, $failure_mode{$mask}; }
2313 }
2314 }
2315 }
2316 else {
2317 $index = $out->{'Type'} eq '[Not Occupied]' ? undef : $out->{'Index'};
2318 $status = $out->{'Status'};
2319 $location = $out->{'Connector Name'};
2320 $size = $out->{'Size'};
2321 if (defined $size) {
2322 $size =~ s{\s\s}{ }gxms;
2323 }
2324 # Run 'omreport chassis memory index=X' to get the failures
2325 if ($status ne 'Ok' && defined $index) {
2326 foreach (@{ run_command("$omreport $omopt_chassis memory index=$index -fmt ssv") }) {
2327 if (m/\A Failures; (.+?) \z/xms) {
2328 chop(my $fail = $1);
2329 push @failures, split m{\.}xms, $fail;
2330 }
2331 }
2332 }
2333 }
2334 $location =~ s{\A \s*(.*?)\s* \z}{$1}xms;
2335
2336 next DIMM if blacklisted('dimm', $index);
2337
2338 # Ignore empty memory slots
2339 next DIMM if !defined $index;
2340 $count{dimm}++;
2341
2342 if ($status ne 'Ok') {
2343 my $msg = undef;
2344 if (scalar @failures == 0) {
98b224a3 2345 $msg = sprintf 'Memory module %d [%s, %s] needs attention (%s)',
669797e1 2346 $index, $location, $size, $status;
2347 }
2348 else {
98b224a3 2349 $msg = sprintf 'Memory module %d [%s, %s] needs attention: %s',
669797e1 2350 $index, $location, $size, (join q{, }, @failures);
2351 }
2352
2353 report('chassis', $msg, $status2nagios{$status}, $index);
2354 }
2355 # Ok
2356 else {
98b224a3 2357 my $msg = sprintf 'Memory module %d [%s, %s] is %s',
669797e1 2358 $index, $location, $size, $status;
2359 report('chassis', $msg, $E_OK, $index);
2360 }
2361 }
2362 return;
2363}
2364
2365
2366#-----------------------------------------
2367# CHASSIS: Check fans
2368#-----------------------------------------
2369sub check_fans {
2370 my $index = undef;
2371 my $status = undef;
2372 my $reading = undef;
2373 my $location = undef;
2374 my $max_crit = undef;
2375 my $max_warn = undef;
2376 my @output = ();
2377
2378 if ($snmp) {
2379 my %cool_oid
2380 = (
2381 '1.3.6.1.4.1.674.10892.1.700.12.1.2.1' => 'coolingDeviceIndex',
2382 '1.3.6.1.4.1.674.10892.1.700.12.1.5.1' => 'coolingDeviceStatus',
2383 '1.3.6.1.4.1.674.10892.1.700.12.1.6.1' => 'coolingDeviceReading',
2384 '1.3.6.1.4.1.674.10892.1.700.12.1.8.1' => 'coolingDeviceLocationName',
2385 '1.3.6.1.4.1.674.10892.1.700.12.1.10.1' => 'coolingDeviceUpperCriticalThreshold',
2386 '1.3.6.1.4.1.674.10892.1.700.12.1.11.1' => 'coolingDeviceUpperNonCriticalThreshold',
2387 );
2388 my $result = $snmp_session->get_entries(-columns => [keys %cool_oid]);
2389
2390 if ($blade && !defined $result) {
2391 return 0;
2392 }
2393 elsif (!$blade && !defined $result) {
98b224a3 2394 printf "SNMP ERROR [cooling]: %s.\n", $snmp_session->error;
669797e1 2395 $snmp_session->close;
2396 exit $E_UNKNOWN;
2397 }
2398
2399 @output = @{ get_snmp_output($result, \%cool_oid) };
2400 }
2401 else {
2402 @output = @{ run_omreport("$omopt_chassis fans") };
2403 }
2404
2405 FAN:
2406 foreach my $out (@output) {
2407 if ($snmp) {
2408 $index = $out->{coolingDeviceIndex};
2409 $status = $snmp_probestatus{$out->{coolingDeviceStatus}};
2410 $reading = $out->{coolingDeviceReading};
2411 $location = $out->{coolingDeviceLocationName};
2412 $max_crit = exists $out->{coolingDeviceUpperCriticalThreshold}
2413 ? $out->{coolingDeviceUpperCriticalThreshold} : 0;
2414 $max_warn = exists $out->{coolingDeviceUpperNonCriticalThreshold}
2415 ? $out->{coolingDeviceUpperNonCriticalThreshold} : 0;
2416 }
2417 else {
2418 $index = $out->{'Index'};
2419 $status = $out->{'Status'};
2420 $reading = $out->{'Reading'};
2421 $location = $out->{'Probe Name'};
2422 $max_crit = $out->{'Maximum Failure Threshold'} ne '[N/A]'
2423 ? $out->{'Maximum Failure Threshold'} : 0;
2424 $max_warn = $out->{'Maximum Warning Threshold'} ne '[N/A]'
2425 ? $out->{'Maximum Warning Threshold'} : 0;
2426 $reading =~ s{\A (\d+).* \z}{$1}xms;
2427 $max_warn =~ s{\A (\d+).* \z}{$1}xms;
2428 $max_crit =~ s{\A (\d+).* \z}{$1}xms;
2429 }
2430
2431 next FAN if blacklisted('fan', $index);
2432 $count{fan}++;
2433
2434 if ($status ne 'Ok') {
98b224a3 2435 my $msg = sprintf 'Chassis fan %d [%s] needs attention: %s',
669797e1 2436 $index, $location, $status;
2437 my $err = $snmp ? $probestatus2nagios{$status} : $status2nagios{$status};
2438 report('chassis', $msg, $err, $index);
2439 }
2440 else {
98b224a3 2441 my $msg = sprintf 'Chassis fan %d [%s]: %s',
669797e1 2442 $index, $location, $reading;
2443 report('chassis', $msg, $E_OK, $index);
2444 }
2445
2446 # Collect performance data
2447 if (defined $opt{perfdata}) {
2448 my $pname = lc $location;
2449 $pname =~ s{\s}{_}gxms;
2450 $pname =~ s{proc_}{cpu#}xms;
2451 my $pkey = join q{_}, 'fan', $index, $pname;
2452 my $pval = join q{;}, "${reading}RPM", $max_warn, $max_crit;
2453 $perfdata{$pkey} = $pval;
2454 }
2455 }
2456 return;
2457}
2458
2459
2460#-----------------------------------------
2461# CHASSIS: Check power supplies
2462#-----------------------------------------
2463sub check_powersupplies {
2464 my $index = undef;
2465 my $status = undef;
2466 my $type = undef;
2467 my $err_type = undef;
2468 my $state = undef;
2469 my @states = ();
2470 my @output = ();
2471
2472 if ($snmp) {
2473 my %ps_oid
2474 = (
2475 '1.3.6.1.4.1.674.10892.1.600.12.1.2.1' => 'powerSupplyIndex',
2476 '1.3.6.1.4.1.674.10892.1.600.12.1.5.1' => 'powerSupplyStatus',
2477 '1.3.6.1.4.1.674.10892.1.600.12.1.7.1' => 'powerSupplyType',
2478 '1.3.6.1.4.1.674.10892.1.600.12.1.11.1' => 'powerSupplySensorState',
2479 '1.3.6.1.4.1.674.10892.1.600.12.1.12.1' => 'powerSupplyConfigurationErrorType',
2480 );
2481 my $result = $snmp_session->get_entries(-columns => [keys %ps_oid]);
2482
2483 # No instrumented PSU is OK (blades, low-end servers)
2484 return 0 if !defined $result;
2485
2486 @output = @{ get_snmp_output($result, \%ps_oid) };
2487 }
2488 else {
2489 @output = @{ run_omreport("$omopt_chassis pwrsupplies") };
2490 }
2491
2492 my %ps_type
2493 = (
2494 1 => 'Other',
2495 2 => 'Unknown',
2496 3 => 'Linear',
2497 4 => 'Switching',
2498 5 => 'Battery',
2499 6 => 'Uninterruptible Power Supply',
2500 7 => 'Converter',
2501 8 => 'Regulator',
2502 9 => 'AC',
2503 10 => 'DC',
2504 11 => 'VRM',
2505 );
2506
2507 my %ps_state
2508 = (
2509 1 => 'Presence detected',
2510 2 => 'Failure detected',
2511 4 => 'Predictive Failure',
2512 8 => 'AC lost',
2513 16 => 'AC lost or out-of-range',
2514 32 => 'AC out-of-range but present',
2515 64 => 'Configuration error',
2516 );
2517
2518 my %ps_config_error_type
2519 = (
2520 1 => 'Vendor mismatch',
2521 2 => 'Revision mismatch',
2522 3 => 'Processor missing',
2523 );
2524
2525 PS:
2526 foreach my $out (@output) {
2527 if ($snmp) {
2528 @states = (); # contains states for the PS
2529
2530 $index = $out->{powerSupplyIndex} - 1;
2531 $status = $snmp_status{$out->{powerSupplyStatus}};
2532 $type = $ps_type{$out->{powerSupplyType}};
2533 $err_type = defined $out->{powerSupplyConfigurationErrorType}
2534 ? $ps_config_error_type{$out->{powerSupplyConfigurationErrorType}} : undef;
2535
2536 # get the combined state from the StatusReading OID
2537 foreach my $mask (sort keys %ps_state) {
2538 if (($out->{powerSupplySensorState} & $mask) != 0) {
2539 push @states, $ps_state{$mask};
2540 }
2541 }
2542
2543 # If configuration error, also include the error type
2544 if (defined $err_type) {
2545 push @states, $err_type;
2546 }
2547
2548 # Finally, construct the state string
2549 $state = join q{, }, @states;
2550 }
2551 else {
2552 $index = $out->{'Index'};
2553 $status = $out->{'Status'};
2554 $type = $out->{'Type'};
2555 $state = $out->{'Online Status'};
2556 }
2557
2558 next PS if blacklisted('ps', $index);
2559 $count{power}++;
2560
2561 if ($status ne 'Ok') {
98b224a3 2562 my $msg = sprintf 'Power Supply %d [%s] needs attention: %s',
669797e1 2563 $index, $type, $state;
2564 report('chassis', $msg, $status2nagios{$status}, $index);
2565 }
2566 else {
98b224a3 2567 my $msg = sprintf 'Power Supply %d [%s]: %s',
669797e1 2568 $index, $type, $state;
2569 report('chassis', $msg, $E_OK, $index);
2570 }
2571 }
2572 return;
2573}
2574
2575
2576#-----------------------------------------
2577# CHASSIS: Check temperatures
2578#-----------------------------------------
2579sub check_temperatures {
2580 my $index = undef;
2581 my $status = undef;
2582 my $reading = undef;
2583 my $location = undef;
2584 my $max_crit = undef;
2585 my $max_warn = undef;
2586 my $min_warn = undef;
2587 my $min_crit = undef;
2588 my $type = undef;
2589 my $discrete = undef;
2590 my @output = ();
2591
2592 # Getting custom temperature thresholds (user option)
2593 my %warn_threshold = %{ custom_temperature_thresholds('w') };
2594 my %crit_threshold = %{ custom_temperature_thresholds('c') };
2595
2596 if ($snmp) {
2597 my %temp_oid
2598 = (
2599 '1.3.6.1.4.1.674.10892.1.700.20.1.2.1' => 'temperatureProbeIndex',
2600 '1.3.6.1.4.1.674.10892.1.700.20.1.5.1' => 'temperatureProbeStatus',
2601 '1.3.6.1.4.1.674.10892.1.700.20.1.6.1' => 'temperatureProbeReading',
2602 '1.3.6.1.4.1.674.10892.1.700.20.1.7.1' => 'temperatureProbeType',
2603 '1.3.6.1.4.1.674.10892.1.700.20.1.8.1' => 'temperatureProbeLocationName',
2604 '1.3.6.1.4.1.674.10892.1.700.20.1.10.1' => 'temperatureProbeUpperCriticalThreshold',
2605 '1.3.6.1.4.1.674.10892.1.700.20.1.11.1' => 'temperatureProbeUpperNonCriticalThreshold',
2606 '1.3.6.1.4.1.674.10892.1.700.20.1.12.1' => 'temperatureProbeLowerNonCriticalThreshold',
2607 '1.3.6.1.4.1.674.10892.1.700.20.1.13.1' => 'temperatureProbeLowerCriticalThreshold',
2608 '1.3.6.1.4.1.674.10892.1.700.20.1.16.1' => 'temperatureProbeDiscreteReading',
2609 );
2610 # this didn't work well for some reason
2611 #my $result = $snmp_session->get_entries(-columns => [keys %temp_oid]);
2612
2613 # Getting values using the table
2614 my $temperatureProbeTable = '1.3.6.1.4.1.674.10892.1.700.20';
2615 my $result = $snmp_session->get_table(-baseoid => $temperatureProbeTable);
2616
2617 if (!defined $result) {
98b224a3 2618 printf "SNMP ERROR [temperatures]: %s.\n", $snmp_session->error;
669797e1 2619 $snmp_session->close;
2620 exit $E_UNKNOWN;
2621 }
2622
2623 @output = @{ get_snmp_output($result, \%temp_oid) };
2624 }
2625 else {
2626 @output = @{ run_omreport("$omopt_chassis temps") };
2627 }
2628
2629 my %probe_type
2630 = (
2631 1 => 'Other', # type is other than following values
2632 2 => 'Unknown', # type is unknown
2633 3 => 'AmbientESM', # type is Ambient Embedded Systems Management temperature probe
2634 16 => 'Discrete', # type is temperature probe with discrete reading
2635 );
2636
2637 TEMP:
2638 foreach my $out (@output) {
2639 if ($snmp) {
2640 $index = $out->{temperatureProbeIndex} - 1;
2641 $status = $snmp_probestatus{$out->{temperatureProbeStatus}};
2642 $reading = $out->{temperatureProbeReading} / 10;
2643 $location = $out->{temperatureProbeLocationName};
2644 $max_crit = $out->{temperatureProbeUpperCriticalThreshold} / 10;
2645 $max_warn = $out->{temperatureProbeUpperNonCriticalThreshold} / 10;
2646 $min_crit = exists $out->{temperatureProbeLowerCriticalThreshold}
2647 ? $out->{temperatureProbeLowerCriticalThreshold} / 10 : '[N/A]';
2648 $min_warn = exists $out->{temperatureProbeLowerNonCriticalThreshold}
2649 ? $out->{temperatureProbeLowerNonCriticalThreshold} / 10 : '[N/A]';
2650 $type = $probe_type{$out->{temperatureProbeType}};
2651 $discrete = exists $out->{temperatureProbeDiscreteReading}
2652 ? $out->{temperatureProbeDiscreteReading} : undef;
2653 }
2654 else {
2655 $index = $out->{'Index'};
2656 $status = $out->{'Status'};
2657 $reading = $out->{'Reading'}; $reading =~ s{\.0\s+C}{}xms;
2658 $location = $out->{'Probe Name'};
2659 $max_crit = $out->{'Maximum Failure Threshold'}; $max_crit =~ s{\.0\s+C}{}xms;
2660 $max_warn = $out->{'Maximum Warning Threshold'}; $max_warn =~ s{\.0\s+C}{}xms;
2661 $min_crit = $out->{'Minimum Failure Threshold'}; $min_crit =~ s{\.0\s+C}{}xms;
2662 $min_warn = $out->{'Minimum Warning Threshold'}; $min_warn =~ s{\.0\s+C}{}xms;
2663 $type = $reading =~ m{\A\d+\z}xms ? 'AmbientESM' : 'Discrete';
2664 $discrete = $reading;
2665 }
2666
2667 next TEMP if blacklisted('temp', $index);
2668 $count{temp}++;
2669
2670 if ($type eq 'Discrete') {
2671 my $msg = sprintf 'Temperature probe %d (%s): is %s',
2672 $index, $location, $discrete;
2673 my $err = $snmp ? $probestatus2nagios{$status} : $status2nagios{$status};
2674 report('chassis', $msg, $err, $index);
2675 }
2676 else {
2677 # First check according to custom thresholds
2678 if (exists $crit_threshold{$index}{max} and $reading > $crit_threshold{$index}{max}) {
2679 # Custom critical MAX
98b224a3 2680 my $msg = sprintf 'Temperature Probe %d [%s] reads %d C (custom max=%d)',
669797e1 2681 $index, $location, $reading, $crit_threshold{$index}{max};
2682 report('chassis', $msg, $E_CRITICAL, $index);
2683 }
2684 elsif (exists $warn_threshold{$index}{max} and $reading > $warn_threshold{$index}{max}) {
2685 # Custom warning MAX
98b224a3 2686 my $msg = sprintf 'Temperature Probe %d [%s] reads %d C (custom max=%d)',
669797e1 2687 $index, $location, $reading, $warn_threshold{$index}{max};
2688 report('chassis', $msg, $E_WARNING, $index);
2689 }
2690 elsif (exists $crit_threshold{$index}{min} and $reading < $crit_threshold{$index}{min}) {
2691 # Custom critical MIN
98b224a3 2692 my $msg = sprintf 'Temperature Probe %d [%s] reads %d C (custom min=%d)',
669797e1 2693 $index, $location, $reading, $crit_threshold{$index}{min};
2694 report('chassis', $msg, $E_CRITICAL, $index);
2695 }
2696 elsif (exists $warn_threshold{$index}{min} and $reading < $warn_threshold{$index}{min}) {
2697 # Custom warning MIN
98b224a3 2698 my $msg = sprintf 'Temperature Probe %d [%s] reads %d C (custom min=%d)',
669797e1 2699 $index, $location, $reading, $warn_threshold{$index}{min};
2700 report('chassis', $msg, $E_WARNING, $index);
2701 }
2702 elsif ($status ne 'Ok' and $max_crit ne '[N/A]' and $reading > $max_crit) {
98b224a3 2703 my $msg = sprintf 'Temperature Probe %d [%s] is critically high at %d C',
669797e1 2704 $index, $location, $reading;
2705 my $err = $snmp ? $probestatus2nagios{$status} : $status2nagios{$status};
2706 report('chassis', $msg, $err, $index);
2707 }
2708 elsif ($status ne 'Ok' and $max_warn ne '[N/A]' and $reading > $max_warn) {
98b224a3 2709 my $msg = sprintf 'Temperature Probe %d [%s] is too high at %d C',
669797e1 2710 $index, $location, $reading;
2711 my $err = $snmp ? $probestatus2nagios{$status} : $status2nagios{$status};
2712 report('chassis', $msg, $err, $index);
2713 }
2714 elsif ($status ne 'Ok' and $min_crit ne '[N/A]' and $reading < $min_crit) {
98b224a3 2715 my $msg = sprintf 'Temperature Probe %d [%s] is critically low at %d C',
669797e1 2716 $index, $location, $reading;
2717 my $err = $snmp ? $probestatus2nagios{$status} : $status2nagios{$status};
2718 report('chassis', $msg, $err, $index);
2719 }
2720 elsif ($status ne 'Ok' and $min_warn ne '[N/A]' and $reading < $min_warn) {
98b224a3 2721 my $msg = sprintf 'Temperature Probe %d [%s] is too low at %d C',
669797e1 2722 $index, $location, $reading;
2723 my $err = $snmp ? $probestatus2nagios{$status} : $status2nagios{$status};
2724 report('chassis', $msg, $err, $index);
2725 }
2726 # Ok
2727 else {
304c4cba 2728 my $msg = sprintf 'Temperature Probe %d [%s] reads %d C',
2729 $index, $location, $reading;
2730 if ($min_warn eq '[N/A]' and $min_crit eq '[N/A]') {
2731 $msg .= sprintf ' (max=%s/%s)', $max_warn, $max_crit;
2732 }
2733 else {
2734 $msg .= sprintf ' (min=%s/%s, max=%s/%s)',
2735 $min_warn, $min_crit, $max_warn, $max_crit;
2736 }
669797e1 2737 my $err = $snmp ? $probestatus2nagios{$status} : $status2nagios{$status};
2738 report('chassis', $msg, $err, $index);
2739 }
2740
2741 # Collect performance data
2742 if (defined $opt{perfdata}) {
2743 my $pname = lc $location;
2744 $pname =~ s{\s}{_}gxms;
2745 $pname =~ s{_temp\z}{}xms;
2746 $pname =~ s{proc_}{cpu#}xms;
2747 my $pkey = join q{_}, 'temp', $index, $pname;
2748 my $pval = join q{;}, "${reading}C", $max_warn, $max_crit;
2749 $perfdata{$pkey} = $pval;
2750 }
2751 }
2752 }
2753 return;
2754}
2755
2756
2757#-----------------------------------------
2758# CHASSIS: Check processors
2759#-----------------------------------------
2760sub check_processors {
2761 my $index = undef;
2762 my $status = undef;
2763 my $state = undef;
2764 my $oid_ver = 'new';
2765 my @output = ();
2766
2767 if ($snmp) {
2768
2769 # NOTE: For some reason, older models don't have the
2770 # "Processor Device Status" OIDs. We first check the newer
2771 # (preferred) OIDs, and if that doesn't work, check the "old"
2772 # OIDs.
2773
2774 my %cpu_oid_new # for newer models
2775 = (
2776 '1.3.6.1.4.1.674.10892.1.1100.32.1.2.1' => 'processorDeviceStatusIndex',
2777 '1.3.6.1.4.1.674.10892.1.1100.32.1.5.1' => 'processorDeviceStatusStatus',
2778 '1.3.6.1.4.1.674.10892.1.1100.32.1.6.1' => 'processorDeviceStatusReading',
2779 );
2780
2781 my %cpu_oid_old # for older models
2782 = (
2783 '1.3.6.1.4.1.674.10892.1.1100.30.1.2.1' => 'processorDeviceIndex',
2784 '1.3.6.1.4.1.674.10892.1.1100.30.1.5.1' => 'processorDeviceStatus',
2785 '1.3.6.1.4.1.674.10892.1.1100.30.1.9.1' => 'processorDeviceStatusState',
2786 );
2787
2788 my $result = $snmp_session->get_entries(-columns => [keys %cpu_oid_new]);
2789
2790 if (!defined $result) {
2791 $oid_ver = 'old';
2792 $result = $snmp_session->get_entries(-columns => [keys %cpu_oid_old]);
2793 }
2794
2795 if (!defined $result) {
98b224a3 2796 printf "SNMP ERROR [processors]: %s.\n", $snmp_session->error;
669797e1 2797 $snmp_session->close;
2798 exit $E_UNKNOWN;
2799 }
2800
2801 if ($oid_ver eq 'new') {
2802 @output = @{ get_snmp_output($result, \%cpu_oid_new) };
2803 }
2804 else {
2805 @output = @{ get_snmp_output($result, \%cpu_oid_old) };
2806 }
2807 }
2808 else {
2809 @output = @{ run_omreport("$omopt_chassis processors") };
2810 }
2811
2812 my %cpu_state
2813 = (
2814 1 => 'Other', # other than following values
2815 2 => 'Unknown', # unknown
2816 3 => 'Enabled', # enabled
2817 4 => 'User Disabled', # disabled by user via BIOS setup
2818 5 => 'BIOS Disabled', # disabled by BIOS (POST error)
2819 6 => 'Idle', # idle
2820 );
2821
2822 my %cpu_reading
2823 = (
2824 1 => 'Internal Error', # Internal Error
2825 2 => 'Thermal Trip', # Thermal Trip
2826 32 => 'Configuration Error', # Configuration Error
2827 128 => 'Present', # Processor Present
2828 256 => 'Disabled', # Processor Disabled
2829 512 => 'Terminator Present', # Terminator Present
2830 1024 => 'Throttled', # Processor Throttled
2831 );
2832
2833
2834 CPU:
2835 foreach my $out (@output) {
2836 if ($snmp) {
2837 if ($oid_ver eq 'new') {
2838 my @states = (); # contains states for the CPU
2839 $index = $out->{processorDeviceStatusIndex} - 1;
2840 $status = $snmp_status{$out->{processorDeviceStatusStatus}};
2841
2842 # get the combined state from the StatusReading OID
2843 foreach my $mask (sort keys %cpu_reading) {
2844 if (($out->{processorDeviceStatusReading} & $mask) != 0) {
2845 push @states, $cpu_reading{$mask};
2846 }
2847 }
2848
2849 # Finally, create the state string
2850 $state = join q{, }, @states;
2851 }
2852 else {
2853 $index = $out->{processorDeviceIndex} - 1;
2854 $status = $snmp_status{$out->{processorDeviceStatus}};
2855 $state = $cpu_state{$out->{processorDeviceStatusState}};
2856 }
2857 }
2858 else {
2859 $index = $out->{'Index'};
2860 $status = $out->{'Status'};
2861 $state = $out->{'State'};
2862 }
2863
2864 next CPU if blacklisted('cpu', $index);
2865
2866 # Ignore unoccupied CPU slots (omreport)
2867 next CPU if (defined $out->{'Processor Manufacturer'}
2868 and $out->{'Processor Manufacturer'} eq '[Not Occupied]')
2869 or (defined $out->{'Processor Brand'} and $out->{'Processor Brand'} eq '[Not Occupied]');
2870
2871 # Ignore unoccupied CPU slots (snmp)
2872 if ($snmp and exists $out->{processorDeviceStatusReading}
2873 and $out->{processorDeviceStatusReading} == 0) {
2874 next CPU;
2875 }
2876
2877 $count{cpu}++;
2878
2879 # Default
2880 if ($status ne 'Ok') {
2881 my $msg = sprintf 'CPU %d needs attention: %s',
2882 $index, $state;
2883 report('chassis', $msg, $status2nagios{$status}, $index);
2884 }
2885 # Ok
2886 else {
2887 my $msg = sprintf 'CPU %d is %s',
2888 $index, $state;
2889 report('chassis', $msg, $E_OK, $index);
2890 }
2891 }
2892 return;
2893}
2894
2895
2896#-----------------------------------------
2897# CHASSIS: Check voltage probes
2898#-----------------------------------------
2899sub check_volts {
2900 my $index = undef;
2901 my $status = undef;
2902 my $reading = undef;
2903 my $location = undef;
2904 my @output = ();
2905
2906 if ($snmp) {
2907 my %volt_oid
2908 = (
2909 '1.3.6.1.4.1.674.10892.1.600.20.1.2.1' => 'voltageProbeIndex',
2910 '1.3.6.1.4.1.674.10892.1.600.20.1.5.1' => 'voltageProbeStatus',
2911 '1.3.6.1.4.1.674.10892.1.600.20.1.6.1' => 'voltageProbeReading',
2912 '1.3.6.1.4.1.674.10892.1.600.20.1.8.1' => 'voltageProbeLocationName',
2913 '1.3.6.1.4.1.674.10892.1.600.20.1.16.1' => 'voltageProbeDiscreteReading',
2914 );
2915
2916 my $voltageProbeTable = '1.3.6.1.4.1.674.10892.1.600.20.1';
2917 my $result = $snmp_session->get_table(-baseoid => $voltageProbeTable);
2918
2919 if (!defined $result) {
98b224a3 2920 printf "SNMP ERROR [voltage]: %s.\n", $snmp_session->error;
669797e1 2921 $snmp_session->close;
2922 exit $E_UNKNOWN;
2923 }
2924
2925 @output = @{ get_snmp_output($result, \%volt_oid) };
2926 }
2927 else {
2928 @output = @{ run_omreport("$omopt_chassis volts") };
2929 }
2930
2931 my %volt_discrete_reading
2932 = (
2933 1 => 'Good',
2934 2 => 'Bad',
2935 );
2936
2937 VOLT:
2938 foreach my $out (@output) {
2939 if ($snmp) {
2940 $index = $out->{voltageProbeIndex} - 1;
2941 $status = $snmp_status{$out->{voltageProbeStatus}};
2942 $reading = exists $out->{voltageProbeReading}
2943 ? sprintf('%.3f V', $out->{voltageProbeReading}/1000)
2944 : $volt_discrete_reading{$out->{voltageProbeDiscreteReading}};
2945 $location = $out->{voltageProbeLocationName};
2946 }
2947 else {
2948 $index = $out->{'Index'};
2949 $status = $out->{'Status'};
2950 $reading = $out->{'Reading'};
2951 $location = $out->{'Probe Name'};
2952 }
2953
2954 next VOLT if blacklisted('volt', $index);
2955 $count{volt}++;
2956
98b224a3 2957 my $msg = sprintf 'Voltage sensor %d [%s] is %s',
669797e1 2958 $index, $location, $reading;
2959 my $err = $snmp ? $probestatus2nagios{$status} : $status2nagios{$status};
2960 report('chassis', $msg, $err, $index);
2961 }
2962 return;
2963}
2964
2965
2966#-----------------------------------------
2967# CHASSIS: Check batteries
2968#-----------------------------------------
2969sub check_batteries {
2970 my $index = undef;
2971 my $status = undef;
2972 my $reading = undef;
2973 my $location = undef;
2974 my @output = ();
2975
2976 if ($snmp) {
2977 my %bat_oid
2978 = (
2979 '1.3.6.1.4.1.674.10892.1.600.50.1.2.1' => 'batteryIndex',
2980 '1.3.6.1.4.1.674.10892.1.600.50.1.5.1' => 'batteryStatus',
2981 '1.3.6.1.4.1.674.10892.1.600.50.1.6.1' => 'batteryReading',
2982 '1.3.6.1.4.1.674.10892.1.600.50.1.7.1' => 'batteryLocationName',
2983 );
2984 my $result = $snmp_session->get_entries(-columns => [keys %bat_oid]);
2985
2986 # No batteries is OK
2987 return 0 if !defined $result;
2988
2989 @output = @{ get_snmp_output($result, \%bat_oid) };
2990 }
2991 else {
2992 @output = @{ run_omreport("$omopt_chassis batteries") };
2993 }
2994
2995 my %bat_reading
2996 = (
2997 1 => 'Predictive Failure',
2998 2 => 'Failed',
2999 4 => 'Presence Detected',
3000 );
3001
3002 BATTERY:
3003 foreach my $out (@output) {
3004 if ($snmp) {
3005 $index = $out->{batteryIndex} - 1;
3006 $status = $snmp_status{$out->{batteryStatus}};
3007 $reading = $bat_reading{$out->{batteryReading}};
3008 $location = $out->{batteryLocationName};
3009 }
3010 else {
3011 $index = $out->{'Index'};
3012 $status = $out->{'Status'};
3013 $reading = $out->{'Reading'};
3014 $location = $out->{'Probe Name'};
3015 }
3016
3017 next BATTERY if blacklisted('bp', $index);
3018 $count{bat}++;
3019
98b224a3 3020 my $msg = sprintf 'Battery probe %d [%s] is %s',
669797e1 3021 $index, $location, $reading;
3022 report('chassis', $msg, $status2nagios{$status}, $index);
3023 }
3024 return;
3025}
3026
3027
3028#-----------------------------------------
3029# CHASSIS: Check amperage probes (power monitoring)
3030#-----------------------------------------
3031sub check_pwrmonitoring {
3032 my $index = undef;
3033 my $status = undef;
3034 my $reading = undef;
3035 my $location = undef;
3036 my $max_crit = undef;
3037 my $max_warn = undef;
3038 my $unit = undef;
3039 my @output = ();
3040
3041 if ($snmp) {
3042 my %amp_oid
3043 = (
3044 '1.3.6.1.4.1.674.10892.1.600.30.1.2.1' => 'amperageProbeIndex',
3045 '1.3.6.1.4.1.674.10892.1.600.30.1.5.1' => 'amperageProbeStatus',
3046 '1.3.6.1.4.1.674.10892.1.600.30.1.6.1' => 'amperageProbeReading',
3047 '1.3.6.1.4.1.674.10892.1.600.30.1.7.1' => 'amperageProbeType',
3048 '1.3.6.1.4.1.674.10892.1.600.30.1.8.1' => 'amperageProbeLocationName',
3049 '1.3.6.1.4.1.674.10892.1.600.30.1.10.1' => 'amperageProbeUpperCriticalThreshold',
3050 '1.3.6.1.4.1.674.10892.1.600.30.1.11.1' => 'amperageProbeUpperNonCriticalThreshold',
3051 '1.3.6.1.4.1.674.10892.1.600.30.1.16.1' => 'amperageProbeDiscreteReading',
3052 );
3053 my $result = $snmp_session->get_entries(-columns => [keys %amp_oid]);
3054
3055 # No pwrmonitoring is OK
3056 return 0 if !defined $result;
3057
3058 @output = @{ get_snmp_output($result, \%amp_oid) };
3059 }
3060 else {
3061 @output = @{ run_omreport("$omopt_chassis pwrmonitoring") };
3062 }
3063
3064 my %amp_type # Amperage probe types
3065 = (
3066 1 => 'amperageProbeTypeIsOther', # other than following values
3067 2 => 'amperageProbeTypeIsUnknown', # unknown
3068 3 => 'amperageProbeTypeIs1Point5Volt', # 1.5 amperage probe
3069 4 => 'amperageProbeTypeIs3Point3volt', # 3.3 amperage probe
3070 5 => 'amperageProbeTypeIs5Volt', # 5 amperage probe
3071 6 => 'amperageProbeTypeIsMinus5Volt', # -5 amperage probe
3072 7 => 'amperageProbeTypeIs12Volt', # 12 amperage probe
3073 8 => 'amperageProbeTypeIsMinus12Volt', # -12 amperage probe
3074 9 => 'amperageProbeTypeIsIO', # I/O probe
3075 10 => 'amperageProbeTypeIsCore', # Core probe
3076 11 => 'amperageProbeTypeIsFLEA', # FLEA (standby) probe
3077 12 => 'amperageProbeTypeIsBattery', # Battery probe
3078 13 => 'amperageProbeTypeIsTerminator', # SCSI Termination probe
3079 14 => 'amperageProbeTypeIs2Point5Volt', # 2.5 amperage probe
3080 15 => 'amperageProbeTypeIsGTL', # GTL (ground termination logic) probe
3081 16 => 'amperageProbeTypeIsDiscrete', # amperage probe with discrete reading
3082 23 => 'amperageProbeTypeIsPowerSupplyAmps', # Power Supply probe with reading in Amps
3083 24 => 'amperageProbeTypeIsPowerSupplyWatts', # Power Supply probe with reading in Watts
3084 25 => 'amperageProbeTypeIsSystemAmps', # System probe with reading in Amps
3085 26 => 'amperageProbeTypeIsSystemWatts', # System probe with reading in Watts
3086 );
3087
3088 my %amp_discrete
3089 = (
3090 1 => 'Good',
3091 2 => 'Bad',
3092 );
3093
3094 my %amp_unit
3095 = (
3096 'amperageProbeTypeIsPowerSupplyAmps' => 'hA', # tenths of Amps
3097 'amperageProbeTypeIsSystemAmps' => 'hA', # tenths of Amps
3098 'amperageProbeTypeIsPowerSupplyWatts' => 'W', # Watts
3099 'amperageProbeTypeIsSystemWatts' => 'W', # Watts
3100 'amperageProbeTypeIsDiscrete' => q{}, # discrete reading, no unit
3101 );
3102
3103 AMP:
3104 foreach my $out (@output) {
3105 if ($snmp) {
3106 $index = $out->{amperageProbeIndex} - 1;
3107 $status = $snmp_status{$out->{amperageProbeStatus}};
3108 $reading = $amp_type{$out->{amperageProbeType}} eq 'amperageProbeTypeIsDiscrete'
3109 ? $amp_discrete{$out->{amperageProbeDiscreteReading}}
3110 : $out->{amperageProbeReading};
3111 $location = $out->{amperageProbeLocationName};
3112 $max_crit = exists $out->{amperageProbeUpperCriticalThreshold}
3113 ? $out->{amperageProbeUpperCriticalThreshold} : 0;
3114 $max_warn = exists $out->{amperageProbeUpperNonCriticalThreshold}
3115 ? $out->{amperageProbeUpperNonCriticalThreshold} : 0;
3116 $unit = exists $amp_unit{$amp_type{$out->{amperageProbeType}}}
3117 ? $amp_unit{$amp_type{$out->{amperageProbeType}}} : 'mA';
3118 if ($unit eq 'hA') {
3119 $reading /= 10;
3120 $max_crit /= 10;
3121 $max_warn /= 10;
3122 $unit = 'A';
3123 }
3124 }
3125 else {
3126 $index = $out->{'Index'};
3127 next if $index !~ m/^\d+$/x;
3128 $status = $out->{'Status'};
3129 $reading = $out->{'Reading'};
3130 $location = $out->{'Probe Name'};
3131 $max_crit = $out->{'Failure Threshold'} ne '[N/A]'
3132 ? $out->{'Failure Threshold'} : 0;
3133 $max_warn = $out->{'Warning Threshold'} ne '[N/A]'
3134 ? $out->{'Warning Threshold'} : 0;
3135 $reading =~ s{\A (\d+.*?)\s+([a-zA-Z]+) \s*\z}{$1}xms;
3136 $unit = $2;
3137 $max_warn =~ s{\A (\d+.*?)\s+[a-zA-Z]+ \s*\z}{$1}xms;
3138 $max_crit =~ s{\A (\d+.*?)\s+[a-zA-Z]+ \s*\z}{$1}xms;
3139 }
3140
3141 next AMP if blacklisted('pm', $index);
3142 next AMP if $index !~ m{\A \d+ \z}xms;
3143 $count{amp}++;
3144
98b224a3 3145 my $msg = sprintf 'Amperage probe %d [%s] reads %s %s',
669797e1 3146 $index, $location, $reading, $unit, $status;
3147 report('chassis', $msg, $status2nagios{$status}, $index);
3148
3149 # Collect performance data
3150 if (defined $opt{perfdata}) {
3151 next AMP if $reading !~ m{\A \d+(\.\d+)? \z}xms; # discrete reading (not number)
3152 my $pname = lc $location;
3153 $pname =~ s{\s}{_}gxms;
3154 my $pkey = join q{_}, 'pwr_mon', $index, $pname;
3155 my $pval = join q{;}, "$reading$unit", $max_warn, $max_crit;
3156 $perfdata{$pkey} = $pval;
3157 }
3158 }
3159
3160 # Collect EXTRA performance data not found at first run. This is a
3161 # rather ugly hack
3162 if (defined $opt{perfdata} && !$snmp) {
3163 my $found = 0;
3164 my $index = 0;
3165 my %used = ();
3166
3167 # find used indexes
3168 foreach (keys %perfdata) {
3169 if (m/\A pwr_mon_(\d+)/xms) {
3170 $used{$1} = 1;
3171 }
3172 }
3173
3174 AMP2:
3175 foreach my $line (@{ run_command("$omreport $omopt_chassis pwrmonitoring -fmt ssv") }) {
3176 chop $line;
3177 if ($line eq 'Location;Reading') {
3178 $found = 1;
3179 next AMP2;
3180 }
3181 if ($line eq q{}) {
3182 $found = 0;
3183 next AMP2;
3184 }
3185 if ($found and $line =~ m/\A ([^;]+?) ; (\d*\.\d+) \s ([AW]) \z/xms) {
3186 my $aname = lc $1;
3187 my $aval = $2;
3188 my $aunit = $3;
3189 $aname =~ s{\s}{_}gxms;
3190
3191 # don't use an existing index
3192 while (exists $used{$index}) { ++$index; }
3193
3194 $perfdata{"pwr_mon_${index}_${aname}"} = "$aval$aunit;0;0";
3195 ++$index;
3196 }
3197 }
3198 }
3199
3200 return;
3201}
3202
3203
3204#-----------------------------------------
3205# CHASSIS: Check intrusion
3206#-----------------------------------------
3207sub check_intrusion {
3208 my $index = undef;
3209 my $status = undef;
3210 my $reading = undef;
3211 my @output = ();
3212
3213 if ($snmp) {
3214 my %int_oid
3215 = (
3216 '1.3.6.1.4.1.674.10892.1.300.70.1.2.1' => 'intrusionIndex',
3217 '1.3.6.1.4.1.674.10892.1.300.70.1.5.1' => 'intrusionStatus',
3218 '1.3.6.1.4.1.674.10892.1.300.70.1.6.1' => 'intrusionReading',
3219 );
3220 my $result = $snmp_session->get_entries(-columns => [keys %int_oid]);
3221
3222 # No intrusion is OK
3223 return 0 if !defined $result;
3224
3225 @output = @{ get_snmp_output($result, \%int_oid) };
3226 }
3227 else {
3228 @output = @{ run_omreport("$omopt_chassis intrusion") };
3229 }
3230
3231 my %int_reading
3232 = (
3233 1 => 'Not Breached', # chassis not breached and no uncleared breaches
3234 2 => 'Breached', # chassis currently breached
3235 3 => 'Breached Prior', # chassis breached prior to boot and has not been cleared
3236 4 => 'Breach Sensor Failure', # intrusion sensor has failed
3237 );
3238
3239 INTRUSION:
3240 foreach my $out (@output) {
3241 if ($snmp) {
3242 $index = $out->{intrusionIndex} - 1;
3243 $status = $snmp_status{$out->{intrusionStatus}};
3244 $reading = $int_reading{$out->{intrusionReading}};
3245 }
3246 else {
3247 $index = $out->{'Index'};
3248 $status = $out->{'Status'};
3249 $reading = $out->{'State'};
3250 }
3251
3252 next INTRUSION if blacklisted('intr', $index);
3253 $count{intr}++;
3254
3255 if ($status ne 'Ok') {
3256 my $msg = sprintf 'Chassis intrusion %d detected: %s',
3257 $index, $reading;
3258 report('chassis', $msg, $E_WARNING, $index);
3259 }
3260 # Ok
3261 else {
3262 my $msg = sprintf 'Chassis intrusion %d detection: %s (%s)',
3263 $index, $status, $reading;
3264 report('chassis', $msg, $E_OK, $index);
3265 }
3266 }
3267 return;
3268}
3269
3270
3271#-----------------------------------------
3272# CHASSIS: Check alert log
3273#-----------------------------------------
3274sub check_alertlog {
3275 return if $snmp; # Not supported with SNMP
3276
3277 my @output = @{ run_omreport("$omopt_system alertlog") };
3278 foreach my $out (@output) {
3279 ++$count{alert}{$out->{Severity}};
3280 }
3281
3282 # Create error messages and set exit value if appropriate
3283 my $err = 0;
3284 if ($count{alert}{'Critical'} > 0) { $err = $E_CRITICAL; }
3285 elsif ($count{alert}{'Non-Critical'} > 0) { $err = $E_WARNING; }
3286
3287 my $msg = sprintf 'Alert log content: %d critical, %d non-critical, %d ok',
3288 $count{alert}{'Critical'}, $count{alert}{'Non-Critical'}, $count{alert}{'Ok'};
3289 report('other', $msg, $err);
3290
3291 return;
3292}
3293
3294#-----------------------------------------
3295# CHASSIS: Check ESM log overall health
3296#-----------------------------------------
3297sub check_esmlog_health {
3298 my $health = 'Ok';
3299
3300 if ($snmp) {
3301 my $systemStateEventLogStatus = '1.3.6.1.4.1.674.10892.1.200.10.1.41.1';
3302 my $result = $snmp_session->get_request(-varbindlist => [$systemStateEventLogStatus]);
3303 if (!defined $result) {
98b224a3 3304 my $msg = sprintf 'SNMP ERROR [esmhealth]: %s',
669797e1 3305 $snmp_session->error;
3306 report('other', $msg, $E_UNKNOWN);
3307 }
3308 $health = $snmp_status{$result->{$systemStateEventLogStatus}};
3309 }
3310 else {
3311 foreach (@{ run_command("$omreport $omopt_system esmlog -fmt ssv") }) {
3312 if (m/\A Health;(.+) \z/xms) {
3313 $health = $1;
3314 chop $health;
3315 last;
3316 }
3317 }
3318 }
3319
3320 # If the overall health of the ESM log is other than "Ok", the
3321 # fill grade of the log is more than 80% and the log should be
3322 # cleared
3323 if ($health eq 'Ok') {
af7c7f76 3324 my $msg = sprintf 'ESM log health is Ok (less than 80%% full)';
669797e1 3325 report('other', $msg, $E_OK);
3326 }
3327 elsif ($health eq 'Critical') {
328d0a74 3328 my $msg = sprintf 'ESM log is 100%% full';
669797e1 3329 report('other', $msg, $status2nagios{$health});
3330 }
3331 else {
3332 my $msg = sprintf 'ESM log is more than 80%% full';
3333 report('other', $msg, $status2nagios{$health});
3334 }
3335
3336 return;
3337}
3338
3339#-----------------------------------------
3340# CHASSIS: Check ESM log
3341#-----------------------------------------
3342sub check_esmlog {
3343 my @output = ();
3344
3345 if ($snmp) {
3346 my %esm_oid
3347 = (
3348 '1.3.6.1.4.1.674.10892.1.300.40.1.7.1' => 'eventLogSeverityStatus',
3349 );
3350 my $result = $snmp_session->get_entries(-columns => [keys %esm_oid]);
3351
3352 # No entries is OK
3353 return if !defined $result;
3354
3355 @output = @{ get_snmp_output($result, \%esm_oid) };
3356 foreach my $out (@output) {
3357 ++$count{esm}{$snmp_status{$out->{eventLogSeverityStatus}}};
3358 }
3359 }
3360 else {
3361 @output = @{ run_omreport("$omopt_system esmlog") };
3362 foreach my $out (@output) {
3363 ++$count{esm}{$out->{Severity}};
3364 }
3365 }
3366
3367 # Create error messages and set exit value if appropriate
3368 my $err = 0;
3369 if ($count{esm}{'Critical'} > 0) { $err = $E_CRITICAL; }
3370 elsif ($count{esm}{'Non-Critical'} > 0) { $err = $E_WARNING; }
3371
3372 my $msg = sprintf 'ESM log content: %d critical, %d non-critical, %d ok',
3373 $count{esm}{'Critical'}, $count{esm}{'Non-Critical'}, $count{esm}{'Ok'};
3374 report('other', $msg, $err);
3375
3376 return;
3377}
3378
3379#
3380# Handy function for checking all storage components
3381#
3382sub check_storage {
3383 check_controllers();
3384 check_physical_disks();
3385 check_virtual_disks();
3386 check_cache_battery();
3387 check_connectors();
3388 check_enclosures();
3389 check_enclosure_fans();
3390 check_enclosure_pwr();
3391 check_enclosure_temp();
3392 check_enclosure_emms();
3393 return;
3394}
3395
3396
3397
3398#---------------------------------------------------------------------
3399# Info functions
3400#---------------------------------------------------------------------
3401
3402#
3403# Fetch output from 'omreport chassis info', put in sysinfo hash
3404#
3405sub get_omreport_chassis_info {
3406 if (open my $INFO, '-|', "$omreport $omopt_chassis info -fmt ssv") {
3407 my @lines = <$INFO>;
3408 close $INFO;
3409 foreach (@lines) {
3410 next if !m/\A (Chassis\sModel|Chassis\sService\sTag|Model|Service\sTag)/xms;
3411 my ($key, $val) = split /;/xms;
3412 $key =~ s{\s+\z}{}xms; # remove trailing whitespace
3413 $val =~ s{\s+\z}{}xms; # remove trailing whitespace
3414 if ($key eq 'Chassis Model' or $key eq 'Model') {
3415 $sysinfo{model} = $val;
3416 }
3417 if ($key eq 'Chassis Service Tag' or $key eq 'Service Tag') {
3418 $sysinfo{serial} = $val;
3419 }
3420 }
3421 }
3422 return;
3423}
3424
3425#
3426# Fetch output from 'omreport chassis bios', put in sysinfo hash
3427#
3428sub get_omreport_chassis_bios {
3429 if (open my $BIOS, '-|', "$omreport $omopt_chassis bios -fmt ssv") {
3430 my @lines = <$BIOS>;
3431 close $BIOS;
3432 foreach (@lines) {
3433 next if !m/;/xms;
3434 my ($key, $val) = split /;/xms;
3435 $key =~ s{\s+\z}{}xms; # remove trailing whitespace
3436 $val =~ s{\s+\z}{}xms; # remove trailing whitespace
3437 $sysinfo{bios} = $val if $key eq 'Version';
3438 $sysinfo{biosdate} = $val if $key eq 'Release Date';
3439 }
3440 }
3441 return;
3442}
3443
3444#
3445# Fetch output from 'omreport system operatingsystem', put in sysinfo hash
3446#
3447sub get_omreport_system_operatingsystem {
3448 if (open my $VER, '-|', "$omreport $omopt_system operatingsystem -fmt ssv") {
3449 my @lines = <$VER>;
3450 close $VER;
3451 foreach (@lines) {
3452 next if !m/;/xms;
3453 my ($key, $val) = split /;/xms;
3454 $key =~ s{\s+\z}{}xms; # remove trailing whitespace
3455 $val =~ s{\s+\z}{}xms; # remove trailing whitespace
3456 if ($key eq 'Operating System') {
3457 $sysinfo{osname} = $val;
3458 }
3459 elsif ($key eq 'Operating System Version') {
3460 $sysinfo{osver} = $val;
3461 }
3462 }
3463 }
3464 return;
3465}
3466
3467#
3468# Fetch output from 'omreport about', put in sysinfo hash
3469#
3470sub get_omreport_about {
3471 if (open my $OM, '-|', "$omreport about -fmt ssv") {
3472 my @lines = <$OM>;
3473 close $OM;
3474 foreach (@lines) {
3475 if (m/\A Version;(.+) \z/xms) {
3476 $sysinfo{om} = $1;
3477 chomp $sysinfo{om};
3478 }
3479 }
3480 }
3481 return;
3482}
3483
3484#
3485# Fetch chassis info via SNMP, put in sysinfo hash
3486#
3487sub get_snmp_chassis_info {
3488 my %chassis_oid
3489 = (
3490 '1.3.6.1.4.1.674.10892.1.300.10.1.9.1' => 'chassisModelName',
3491 '1.3.6.1.4.1.674.10892.1.300.10.1.11.1' => 'chassisServiceTagName',
3492 );
3493
3494 my $chassisInformationTable = '1.3.6.1.4.1.674.10892.1.300.10.1';
3495 my $result = $snmp_session->get_table(-baseoid => $chassisInformationTable);
3496
3497 if (defined $result) {
3498 foreach my $oid (keys %{ $result }) {
3499 if (exists $chassis_oid{$oid} and $chassis_oid{$oid} eq 'chassisModelName') {
3500 $sysinfo{model} = $result->{$oid};
3501 $sysinfo{model} =~ s{\s+\z}{}xms; # remove trailing whitespace
3502 }
3503 elsif (exists $chassis_oid{$oid} and $chassis_oid{$oid} eq 'chassisServiceTagName') {
3504 $sysinfo{serial} = $result->{$oid};
3505 }
3506 }
3507 }
3508 else {
3509 my $msg = sprintf 'SNMP ERROR getting chassis info: %s',
3510 $snmp_session->error;
3511 report('other', $msg, $E_UNKNOWN);
3512 }
3513 return;
3514}
3515
3516#
3517# Fetch BIOS info via SNMP, put in sysinfo hash
3518#
3519sub get_snmp_chassis_bios {
3520 my %bios_oid
3521 = (
3522 '1.3.6.1.4.1.674.10892.1.300.50.1.7.1.1' => 'systemBIOSReleaseDateName',
3523 '1.3.6.1.4.1.674.10892.1.300.50.1.8.1.1' => 'systemBIOSVersionName',
3524 );
3525
3526 my $systemBIOSTable = '1.3.6.1.4.1.674.10892.1.300.50.1';
3527 my $result = $snmp_session->get_table(-baseoid => $systemBIOSTable);
3528
3529 if (defined $result) {
3530 foreach my $oid (keys %{ $result }) {
3531 if (exists $bios_oid{$oid} and $bios_oid{$oid} eq 'systemBIOSReleaseDateName') {
3532 $sysinfo{biosdate} = $result->{$oid};
3533 $sysinfo{biosdate} =~ s{\A (\d{4})(\d{2})(\d{2}).*}{$2/$3/$1}xms;
3534 }
3535 elsif (exists $bios_oid{$oid} and $bios_oid{$oid} eq 'systemBIOSVersionName') {
3536 $sysinfo{bios} = $result->{$oid};
3537 }
3538 }
3539 }
3540 else {
3541 my $msg = sprintf 'SNMP ERROR getting BIOS info: %s',
3542 $snmp_session->error;
3543 report('other', $msg, $E_UNKNOWN);
3544 }
3545 return;
3546}
3547
3548#
3549# Fetch OS info via SNMP, put in sysinfo hash
3550#
3551sub get_snmp_system_operatingsystem {
3552 my %os_oid
3553 = (
3554 '1.3.6.1.4.1.674.10892.1.400.10.1.6.1' => 'operatingSystemOperatingSystemName',
3555 '1.3.6.1.4.1.674.10892.1.400.10.1.7.1' => 'operatingSystemOperatingSystemVersionName',
3556 );
3557
3558 my $operatingSystemTable = '1.3.6.1.4.1.674.10892.1.400.10.1';
3559 my $result = $snmp_session->get_table(-baseoid => $operatingSystemTable);
3560
3561 if (defined $result) {
3562 foreach my $oid (keys %{ $result }) {
3563 if (exists $os_oid{$oid} and $os_oid{$oid} eq 'operatingSystemOperatingSystemName') {
3564 $sysinfo{osname} = ($result->{$oid});
3565 }
3566 elsif (exists $os_oid{$oid} and $os_oid{$oid} eq 'operatingSystemOperatingSystemVersionName') {
3567 $sysinfo{osver} = $result->{$oid};
3568 }
3569 }
3570 }
3571 else {
3572 my $msg = sprintf 'SNMP ERROR getting OS info: %s',
3573 $snmp_session->error;
3574 report('other', $msg, $E_UNKNOWN);
3575 }
3576 return;
3577}
3578
3579#
3580# Fetch OMSA version via SNMP, put in sysinfo hash
3581#
3582sub get_snmp_about {
3583 my %omsa_oid
3584 = (
3585 '1.3.6.1.4.1.674.10892.1.100.10.0' => 'systemManagementSoftwareGlobalVersionName',
3586 );
3587 my $systemManagementSoftwareGroup = '1.3.6.1.4.1.674.10892.1.100';
3588 my $result = $snmp_session->get_table(-baseoid => $systemManagementSoftwareGroup);
3589 if (defined $result) {
3590 foreach my $oid (keys %{ $result }) {
3591 if (exists $omsa_oid{$oid} and $omsa_oid{$oid} eq 'systemManagementSoftwareGlobalVersionName') {
3592 $sysinfo{om} = ($result->{$oid});
3593 }
3594 }
3595 }
3596 else {
3597 my $msg = sprintf 'SNMP ERROR getting OMSA info: %s',
3598 $snmp_session->error;
3599 report('other', $msg, $E_UNKNOWN);
3600 }
3601 return;
3602}
3603
3604#
3605# Collects some information about the system
3606#
3607sub get_sysinfo
3608{
3609 # Get system model and serial number
3610 $snmp ? get_snmp_chassis_info() : get_omreport_chassis_info();
3611
3612 # Get BIOS information. Only if needed
3613 if ( $opt{okinfo} >= 1
3614 or $opt{debug}
3615 or (defined $opt{postmsg} and $opt{postmsg} =~ m/[%][bd]/xms) ) {
3616 $snmp ? get_snmp_chassis_bios() : get_omreport_chassis_bios();
3617 }
3618
3619 # Return now if debug
3620 return if $opt{debug};
3621
3622 # Get OS information. Only if needed
3623 if (defined $opt{postmsg} and $opt{postmsg} =~ m/[%][or]/xms) {
3624 $snmp ? get_snmp_system_operatingsystem() : get_omreport_system_operatingsystem();
3625 }
3626
3627 # Get OMSA information. Only if needed
3628 if ($opt{okinfo} >= 3) {
3629 $snmp ? get_snmp_about() : get_omreport_about();
3630 }
3631
3632 return;
3633}
3634
3635
3636# Helper function for running omreport when the results are strictly
3637# name=value pairs.
3638sub run_omreport_info {
3639 my $command = shift;
3640 my %output = ();
3641 my @keys = ();
3642
3643 # Run omreport and fetch output
3644 my $rawtext = slurp_command("$omreport $command -fmt ssv 2>&1");
3645
3646 # Parse output, store in array
3647 for ((split /\n/xms, $rawtext)) {
3648 if (m/\A Error/xms) {
3649 my $msg = "Problem running 'omreport $command': $_";
3650 report('other', $msg, $E_UNKNOWN);
3651 }
3652 next if !m/;/xms; # ignore lines with less than two fields
3653 my @vals = split m/;/xms;
3654 $output{$vals[0]} = $vals[1];
3655 }
3656
3657 # Finally, return the collected information
3658 return \%output;
3659}
3660
3661# Get various firmware information (BMC, RAC)
3662sub get_firmware_info {
3663 my @snmp_output = ();
3664 my %nrpe_output = ();
3665
3666 if ($snmp) {
3667 my %fw_oid
3668 = (
3669 '1.3.6.1.4.1.674.10892.1.300.60.1.7.1' => 'firmwareType',
3670 '1.3.6.1.4.1.674.10892.1.300.60.1.8.1' => 'firmwareTypeName',
3671 '1.3.6.1.4.1.674.10892.1.300.60.1.11.1' => 'firmwareVersionName',
3672 );
3673
3674 my $firmwareTable = '1.3.6.1.4.1.674.10892.1.300.60.1';
3675 my $result = $snmp_session->get_table(-baseoid => $firmwareTable);
3676
3677 # Some don't have this OID, this is ok
3678 if (!defined $result) {
3679 return;
3680 }
3681
3682 @snmp_output = @{ get_snmp_output($result, \%fw_oid) };
3683 }
3684 else {
3685 %nrpe_output = %{ run_omreport_info("$omopt_chassis info") };
3686 }
3687
3688 my %fw_type # Firmware types
3689 = (
3690 1 => 'other', # other than following values
3691 2 => 'unknown', # unknown
3692 3 => 'systemBIOS', # System BIOS
3693 4 => 'embeddedSystemManagementController', # Embedded System Management Controller
3694 5 => 'powerSupplyParallelingBoard', # Power Supply Paralleling Board
3695 6 => 'systemBackPlane', # System (Primary) Backplane
3696 7 => 'powerVault2XXSKernel', # PowerVault 2XXS Kernel
3697 8 => 'powerVault2XXSApplication', # PowerVault 2XXS Application
3698 9 => 'frontPanel', # Front Panel Controller
3699 10 => 'baseboardManagementController', # Baseboard Management Controller
3700 11 => 'hotPlugPCI', # Hot Plug PCI Controller
3701 12 => 'sensorData', # Sensor Data Records
3702 13 => 'peripheralBay', # Peripheral Bay Backplane
3703 14 => 'secondaryBackPlane', # Secondary Backplane for ESM 2 systems
3704 15 => 'secondaryBackPlaneESM3And4', # Secondary Backplane for ESM 3 and 4 systems
3705 16 => 'rac', # Remote Access Controller
3706 17 => 'imc' # Integrated Management Controller
3707 );
3708
3709
3710 if ($snmp) {
3711 foreach my $out (@snmp_output) {
3712 if ($fw_type{$out->{firmwareType}} eq 'baseboardManagementController') {
3713 $sysinfo{'bmc'} = 1;
3714 $sysinfo{'bmc_fw'} = $out->{firmwareVersionName};
3715 }
3716 elsif ($fw_type{$out->{firmwareType}} =~ m{\A rac|imc \z}xms) {
3717 my $name = $out->{firmwareTypeName}; $name =~ s/\s//gxms;
3718 $sysinfo{'rac'} = 1;
3719 $sysinfo{'rac_name'} = $name;
3720 $sysinfo{'rac_fw'} = $out->{firmwareVersionName};
3721 }
3722 }
3723 }
3724 else {
3725 foreach my $key (keys %nrpe_output) {
3726 next if !defined $nrpe_output{$key};
3727 if ($key eq 'BMC Version' or $key eq 'Baseboard Management Controller Version') {
3728 $sysinfo{'bmc'} = 1;
3729 $sysinfo{'bmc_fw'} = $nrpe_output{$key};
3730 }
3731 elsif ($key =~ m{\A (i?DRAC)\s*(\d?)\s+Version}xms) {
3732 my $name = "$1$2";
3733 $sysinfo{'rac'} = 1;
3734 $sysinfo{'rac_fw'} = $nrpe_output{$key};
3735 $sysinfo{'rac_name'} = $name;
3736 }
3737 }
3738 }
3739
3740 return;
3741}
3742
3743
3744
3745#=====================================================================
3746# Main program
3747#=====================================================================
3748
3749# Here we do the actual checking of components
3750# Check global status if applicable
3751if ($global) {
3752 $globalstatus = check_global();
3753}
3754
3755# Do multiple selected checks
3756if ($check{storage}) { check_storage(); }
3757if ($check{memory}) { check_memory(); }
3758if ($check{fans}) { check_fans(); }
3759if ($check{power}) { check_powersupplies(); }
3760if ($check{temp}) { check_temperatures(); }
3761if ($check{cpu}) { check_processors(); }
3762if ($check{voltage}) { check_volts(); }
3763if ($check{batteries}) { check_batteries(); }
3764if ($check{amperage}) { check_pwrmonitoring(); }
3765if ($check{intrusion}) { check_intrusion(); }
3766if ($check{alertlog}) { check_alertlog(); }
3767if ($check{esmlog}) { check_esmlog(); }
3768if ($check{esmhealth}) { check_esmlog_health(); }
3769
3770
3771#---------------------------------------------------------------------
3772# Finish up
3773#---------------------------------------------------------------------
3774
3775# Counter variable
3776%nagios_alert_count
3777 = (
3778 'OK' => 0,
3779 'WARNING' => 0,
3780 'CRITICAL' => 0,
3781 'UNKNOWN' => 0,
3782 );
3783
3784# Get system information
3785get_sysinfo();
3786
3787# Get firmware info if requested via option
3788if ($opt{okinfo} >= 1) {
3789 get_firmware_info();
3790}
3791
3792# Close SNMP session
3793if ($snmp) {
3794 $snmp_session->close;
3795}
3796
3797# Print messages
3798if ($opt{debug}) {
3799 print " System: $sysinfo{model}\n";
3800 print " ServiceTag: $sysinfo{serial}\n";
3801 print " BIOS/date: $sysinfo{bios} $sysinfo{biosdate}\n";
3802 if ($#report_storage >= 0) {
3803 print "-----------------------------------------------------------------------------\n";
3804 print " Storage Components \n";
3805 print "=============================================================================\n";
3806 print " STATE | ID | MESSAGE TEXT \n";
3807 print "---------+----------+--------------------------------------------------------\n";
3808 foreach (@report_storage) {
3809 my ($msg, $level, $nexus) = @{$_};
3810 print q{ } x (8 - length $reverse_exitcode{$level}) . "$reverse_exitcode{$level} | "
3811 . q{ } x (8 - length $nexus) . "$nexus | $msg\n";
3812 $nagios_alert_count{$reverse_exitcode{$level}}++;
3813 }
3814 }
3815 if ($#report_chassis >= 0) {
3816 print "-----------------------------------------------------------------------------\n";
3817 print " Chassis Components \n";
3818 print "=============================================================================\n";
3819 print " STATE | ID | MESSAGE TEXT \n";
3820 print "---------+------+------------------------------------------------------------\n";
3821 foreach (@report_chassis) {
3822 my ($msg, $level, $nexus) = @{$_};
3823 print q{ } x (8 - length $reverse_exitcode{$level}) . "$reverse_exitcode{$level} | "
3824 . q{ } x (4 - length $nexus) . "$nexus | $msg\n";
3825 $nagios_alert_count{$reverse_exitcode{$level}}++;
3826 }
3827 }
3828 if ($#report_other >= 0) {
3829 print "-----------------------------------------------------------------------------\n";
3830 print " Other messages \n";
3831 print "=============================================================================\n";
3832 print " STATE | MESSAGE TEXT \n";
3833 print "---------+-------------------------------------------------------------------\n";
3834 foreach (@report_other) {
3835 my ($msg, $level, $nexus) = @{$_};
3836 print q{ } x (8 - length $reverse_exitcode{$level}) . "$reverse_exitcode{$level} | $msg\n";
3837 $nagios_alert_count{$reverse_exitcode{$level}}++;
3838 }
3839 }
3840}
3841else {
3842 my $c = 0; # counter to determine linebreaks
3843
3844 # Run through each message, sorted by severity level
3845 ALERT:
3846 foreach (sort {$a->[1] < $b->[1]} (@report_storage, @report_chassis, @report_other)) {
3847 my ($msg, $level, $nexus) = @{ $_ };
3848 next ALERT if $level == $E_OK;
3849
3850 if (defined $opt{only}) {
3851 # If user wants only critical alerts
3852 next ALERT if ($opt{only} eq 'critical' and $level == $E_WARNING);
3853
3854 # If user wants only warning alerts
3855 next ALERT if ($opt{only} eq 'warning' and $level == $E_CRITICAL);
3856 }
3857
3858 # Prefix with service tag if specified with option '-i|--info'
3859 if ($opt{info}) {
3860 if (defined $opt{htmlinfo}) {
3861 $msg = '[<a href="' . warranty_url($sysinfo{serial})
3862 . "\">$sysinfo{serial}</a>] " . $msg;
3863 }
3864 else {
3865 $msg = "[$sysinfo{serial}] " . $msg;
3866 }
3867 }
3868
3869 # Prefix with nagios level if specified with option '--state'
3870 $msg = $reverse_exitcode{$level} . ": $msg" if $opt{state};
3871
3872 # Prefix with one-letter nagios level if specified with option '--short-state'
3873 $msg = (substr $reverse_exitcode{$level}, 0, 1) . ": $msg" if $opt{shortstate};
3874
3875 ($c++ == 0) ? print $msg : print $linebreak, $msg;
3876
3877 $nagios_alert_count{$reverse_exitcode{$level}}++;
3878 }
3879}
3880
3881# Determine our exit code
3882$exit_code = $E_OK;
3883$exit_code = $E_UNKNOWN if $nagios_alert_count{'UNKNOWN'} > 0;
3884$exit_code = $E_WARNING if $nagios_alert_count{'WARNING'} > 0;
3885$exit_code = $E_CRITICAL if $nagios_alert_count{'CRITICAL'} > 0;
3886
3887# Global status via SNMP.. extra safety check
3888if ($globalstatus != $E_OK && $exit_code == $E_OK && !defined $opt{only}) {
3889 print "OOPS! Something is wrong with this server, but I don't know what. ";
3890 print "The global system health status is $reverse_exitcode{$globalstatus}, ";
3891 print "but every component check is OK. This may be a bug in the Nagios plugin, ";
3892 print "please file a bug report.\n";
3893 exit $E_UNKNOWN;
3894}
3895
3896# Print OK message
3897if ($exit_code == $E_OK && defined $opt{only} && $opt{only} !~ m{\A critical|warning|chassis \z}xms && !$opt{debug}) {
3898 my %okmsg
3899 = ( 'storage' => "STORAGE OK - $count{pdisk} physical drives, $count{vdisk} logical drives",
3900 'fans' => $count{fan} == 0 && $blade ? 'OK - blade system with no fan probes' : "FANS OK - $count{fan} fan probes checked",
3901 'temp' => "TEMPERATURES OK - $count{temp} temperature probes checked",
3902 'memory' => "MEMORY OK - $count{dimm} memory modules checked",
3903 'power' => $count{power} == 0 ? 'OK - no instrumented power supplies found' : "POWER OK - $count{power} power supplies checked",
3904 'cpu' => "PROCESSORS OK - $count{cpu} processors checked",
3905 'voltage' => "VOLTAGE OK - $count{volt} voltage probes checked",
3906 'batteries' => $count{bat} == 0 ? 'OK - no batteries found' : "BATTERIES OK - $count{bat} batteries checked",
3907 'amperage' => $count{amp} == 0 ? 'OK - no power monitoring probes found' : "AMPERAGE OK - $count{amp} amperage (power monitoring) probes checked",
3908 'intrusion' => $count{intr} == 0 ? 'OK - no intrusion detection probes found' : "INTRUSION OK - $count{intr} intrusion detection probes checked",
3909 'alertlog' => $snmp ? 'OK - not supported via snmp' : "OK - Alert Log content: $count{alert}{Ok} ok, $count{alert}{'Non-Critical'} warning and $count{alert}{Critical} critical",
3910 'esmlog' => "OK - ESM Log content: $count{esm}{Ok} ok, $count{esm}{'Non-Critical'} warning and $count{esm}{Critical} critical",
3911 'esmhealth' => "ESM LOG OK - less than 80% used",
3912 );
3913
3914 print $okmsg{$opt{only}};
3915}
3916elsif ($exit_code == $E_OK && !$opt{debug}) {
3917 if (defined $opt{htmlinfo}) {
3918 printf q{OK - System: '<a href="%s">%s</a>', SN: '<a href="%s">%s</a>', hardware working fine},
3919 documentation_url($sysinfo{model}), $sysinfo{model},
3920 warranty_url($sysinfo{serial}), $sysinfo{serial};
3921 }
3922 else {
3923 printf q{OK - System: '%s', SN: '%s', hardware working fine},
3924 $sysinfo{model}, $sysinfo{serial};
3925 }
3926
3927 if ($check{storage}) {
3928 printf ', %d logical drives, %d physical drives',
3929 $count{vdisk}, $count{pdisk};
3930 }
3931 else {
3932 print ', not checking storage';
3933 }
3934
3935 if ($opt{okinfo} >= 1) {
3936 print $linebreak;
3937 printf q{----- BIOS='%s %s'}, $sysinfo{bios}, $sysinfo{biosdate};
3938
3939 if ($sysinfo{rac}) {
3940 printf q{, %s='%s'}, $sysinfo{rac_name}, $sysinfo{rac_fw};
3941 }
3942 if ($sysinfo{bmc}) {
3943 printf q{, BMC='%s'}, $sysinfo{bmc_fw};
3944 }
3945 }
3946
3947 if ($opt{okinfo} >= 2) {
3948 if ($check{storage}) {
3949 my @storageprint = ();
3950 foreach my $id (sort keys %{ $sysinfo{controller} }) {
3951 chomp $sysinfo{controller}{$id}{driver};
3952 push @storageprint, sprintf q{----- CTRL %s (%s): FW='%s', DR='%s'},
3953 $sysinfo{controller}{$id}{id}, $sysinfo{controller}{$id}{name},
3954 $sysinfo{controller}{$id}{firmware}, $sysinfo{controller}{$id}{driver};
3955 }
3956 foreach my $id (sort keys %{ $sysinfo{enclosure} }) {
3957 push @storageprint, sprintf q{----- ENCL %s (%s): FW='%s'},
3958 $sysinfo{enclosure}{$id}->{id}, $sysinfo{enclosure}{$id}->{name},
3959 $sysinfo{enclosure}{$id}->{firmware};
3960 }
3961
3962 # print stuff
3963 foreach my $line (@storageprint) {
3964 print $linebreak, $line;
3965 }
3966 }
3967 }
3968
3969 if ($opt{okinfo} >= 3) {
3970 print "$linebreak----- OpenManage Server Administrator (OMSA) version: '$sysinfo{om}'";
3971 }
3972
3973}
3974else {
3975 if ($opt{extinfo}) {
3976 print $linebreak;
3977 if (defined $opt{htmlinfo}) {
3978 printf '------ SYSTEM: <a href="%s">%s</a>, SN: <a href="%s">%s</a>',
3979 documentation_url($sysinfo{model}), $sysinfo{model},
3980 warranty_url($sysinfo{serial}), $sysinfo{serial};
3981 }
3982 else {
3983 printf '------ SYSTEM: %s, SN: %s',
3984 $sysinfo{model}, $sysinfo{serial};
3985 }
3986 }
3987 if (defined $opt{postmsg}) {
3988 my $post = undef;
3989 if (-f $opt{postmsg}) {
3990 open my $POST, '<', $opt{postmsg}
3991 or ( print $linebreak
3992 and print "ERROR: Couldn't open post message file $opt{postmsg}: $!\n"
3993 and exit $E_UNKNOWN );
3994 $post = <$POST>;
3995 close $POST;
3996 chomp $post;
3997 }
3998 else {
3999 $post = $opt{postmsg};
4000 }
4001 if (defined $post) {
4002 print $linebreak;
4003 $post =~ s{[%]s}{$sysinfo{serial}}gxms;
4004 $post =~ s{[%]m}{$sysinfo{model}}gxms;
4005 $post =~ s{[%]b}{$sysinfo{bios}}gxms;
4006 $post =~ s{[%]d}{$sysinfo{biosdate}}gxms;
4007 $post =~ s{[%]o}{$sysinfo{osname}}gxms;
4008 $post =~ s{[%]r}{$sysinfo{osver}}gxms;
4009 $post =~ s{[%]p}{$count{pdisk}}gxms;
4010 $post =~ s{[%]l}{$count{vdisk}}gxms;
4011 $post =~ s{[%]n}{$linebreak}gxms;
4012 $post =~ s{[%]{2}}{%}gxms;
4013 print $post;
4014 }
4015 }
4016}
4017
4018# Print performance data
4019if (defined $opt{perfdata} && !$opt{debug} && %perfdata) {
4020 my $lb = $opt{perfdata} eq 'multiline' ? "\n" : q{ }; # line break for perfdata
4021 print q{|};
4022
4023 sub perfdata {
4024 my %order
4025 = (
4026 fan => 0,
4027 pwr => 1,
4028 temp => 2,
4029 enclosure => 3,
4030 );
4031 return ($order{(split /_/, $a, 2)[0]} cmp $order{(split /_/, $b, 2)[0]}) || $a cmp $b;
4032 }
4033
4034 print join $lb, map { "'$_'=$perfdata{$_}" } sort perfdata keys %perfdata;
4035}
e133d101 4036
4037# Print a linebreak at the end
669797e1 4038print "\n" if !$opt{debug};
4039
4040# Exit with proper exit code
4041exit $exit_code;