]>
Commit | Line | Data |
---|---|---|
669797e1 | 1 | #!/usr/bin/perl |
2 | # | |
3 | # Nagios plugin | |
4 | # | |
5 | # Monitor Dell server hardware status using Dell OpenManage Server | |
6 | # Administrator, either locally via NRPE, or remotely via SNMP. | |
7 | # | |
8 | # $Id$ | |
9 | # | |
f1728beb | 10 | # Copyright (C) 2010 Trond H. Amundsen |
669797e1 | 11 | # |
12 | # This program is free software: you can redistribute it and/or modify | |
13 | # it under the terms of the GNU General Public License as published by | |
14 | # the Free Software Foundation, either version 3 of the License, or | |
15 | # (at your option) any later version. | |
16 | # | |
17 | # This program is distributed in the hope that it will be useful, but | |
18 | # WITHOUT ANY WARRANTY; without even the implied warranty of | |
19 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
20 | # General Public License for more details. | |
21 | # | |
22 | # You should have received a copy of the GNU General Public License | |
23 | # along with this program. If not, see <http://www.gnu.org/licenses/>. | |
24 | # | |
25 | ||
26 | require 5.006; # Perl v5.6.0 or newer is required | |
27 | use strict; | |
28 | use warnings; | |
29 | use POSIX qw(isatty ceil); | |
30 | use Getopt::Long qw(:config no_ignore_case); | |
31 | ||
32 | # Global (package) variables used throughout the code | |
33 | use vars qw( $NAME $VERSION $AUTHOR $CONTACT $E_OK $E_WARNING $E_CRITICAL | |
34 | $E_UNKNOWN $FW_LOCK $USAGE $HELP $LICENSE | |
35 | $snmp_session $snmp_error $omreport $globalstatus $global | |
36 | $linebreak $omopt_chassis $omopt_system $blade | |
cbbc270f | 37 | $exit_code $snmp $original_sigwarn |
669797e1 | 38 | %check %opt %perfdata %reverse_exitcode %status2nagios |
39 | %snmp_status %snmp_probestatus %probestatus2nagios %sysinfo | |
40 | %blacklist %nagios_alert_count %count | |
cbbc270f | 41 | @perl_warnings @controllers @enclosures |
669797e1 | 42 | @report_storage @report_chassis @report_other |
43 | ); | |
44 | ||
45 | #--------------------------------------------------------------------- | |
46 | # Initialization and global variables | |
47 | #--------------------------------------------------------------------- | |
48 | ||
eab0860a | 49 | # Small subroutine to collect any perl warnings during execution |
cbbc270f | 50 | sub collect_perl_warning { |
51 | push @perl_warnings, [@_]; | |
669797e1 | 52 | } |
53 | ||
cbbc270f | 54 | # Set the WARN signal to use our collect subroutine above |
55 | $original_sigwarn = $SIG{__WARN__}; | |
56 | $SIG{__WARN__} = \&collect_perl_warning; | |
57 | ||
669797e1 | 58 | # Version and similar info |
59 | $NAME = 'check_openmanage'; | |
956cf4d1 | 60 | $VERSION = '3.5.4'; |
669797e1 | 61 | $AUTHOR = 'Trond H. Amundsen'; |
62 | $CONTACT = 't.h.amundsen@usit.uio.no'; | |
63 | ||
64 | # Exit codes | |
65 | $E_OK = 0; | |
66 | $E_WARNING = 1; | |
67 | $E_CRITICAL = 2; | |
68 | $E_UNKNOWN = 3; | |
69 | ||
70 | # Firmware update lock file [FIXME: location on Windows?] | |
71 | $FW_LOCK = '/var/lock/.spsetup'; # default on Linux | |
72 | ||
73 | # Usage text | |
74 | $USAGE = <<"END_USAGE"; | |
75 | Usage: $NAME [OPTION]... | |
76 | END_USAGE | |
77 | ||
78 | # Help text | |
79 | $HELP = <<'END_HELP'; | |
80 | ||
81 | GENERAL OPTIONS: | |
82 | ||
83 | -p, --perfdata Output performance data | |
84 | -t, --timeout Plugin timeout in seconds | |
85 | -c, --critical Customise temperature critical limits | |
86 | -w, --warning Customise temperature warning limits | |
87 | -d, --debug Debug output, reports everything | |
88 | -h, --help Display this help text | |
89 | -V, --version Display version info | |
90 | ||
91 | SNMP OPTIONS: | |
92 | ||
93 | -H, --hostname Hostname or IP of the server (needed for SNMP) | |
94 | -C, --community SNMP community string | |
95 | -P, --protocol SNMP protocol version | |
96 | --port SNMP port number | |
97 | ||
98 | OUTPUT OPTIONS: | |
99 | ||
100 | -i, --info Prefix any alerts with the service tag | |
101 | -e, --extinfo Append system info to alerts | |
102 | -s, --state Prefix alerts with alert state | |
103 | --short-state Prefix alerts with alert state (abbreviated) | |
104 | -o, --okinfo Verbosity when check result is OK | |
105 | --htmlinfo HTML output with clickable links | |
106 | ||
107 | CHECK CONTROL AND BLACKLISTING: | |
108 | ||
109 | -a, --all Check everything, even log content | |
110 | -b, --blacklist Blacklist missing and/or failed components | |
111 | --only Only check a certain component or alert type | |
112 | --check Fine-tune which components are checked | |
113 | ||
114 | For more information and advanced options, see the manual page or URL: | |
115 | http://folk.uio.no/trondham/software/check_openmanage.html | |
116 | END_HELP | |
117 | ||
118 | # Version and license text | |
119 | $LICENSE = <<"END_LICENSE"; | |
120 | $NAME $VERSION | |
f1728beb | 121 | Copyright (C) 2010 $AUTHOR |
669797e1 | 122 | License GPLv3+: GNU GPL version 3 or later <http://gnu.org/licenses/gpl.html> |
123 | This is free software: you are free to change and redistribute it. | |
124 | There is NO WARRANTY, to the extent permitted by law. | |
125 | ||
126 | Written by $AUTHOR <$CONTACT> | |
127 | END_LICENSE | |
128 | ||
129 | # Options with default values | |
130 | %opt = ( 'blacklist' => [], | |
131 | 'check' => [], | |
132 | 'critical' => [], | |
133 | 'warning' => [], | |
134 | 'timeout' => 30, # default timeout is 30 seconds | |
135 | 'debug' => 0, | |
136 | 'help' => 0, | |
137 | 'perfdata' => undef, | |
138 | 'info' => 0, | |
139 | 'extinfo' => 0, | |
140 | 'htmlinfo' => undef, | |
141 | 'postmsg' => undef, | |
142 | 'state' => 0, | |
143 | 'short-state' => 0, | |
144 | 'okinfo' => 0, # default "ok" output level | |
145 | 'linebreak' => undef, | |
146 | 'version' => 0, | |
147 | 'all' => 0, | |
148 | 'only' => undef, | |
9ed0700c | 149 | 'omreport' => undef, |
669797e1 | 150 | 'port' => 161, # default SNMP port |
151 | 'hostname' => undef, | |
152 | 'community' => 'public', # SMNP v1 or v2c | |
153 | 'protocol' => 2, | |
154 | 'username' => undef, # SMNP v3 | |
155 | 'authpassword' => undef, # SMNP v3 | |
156 | 'authkey' => undef, # SMNP v3 | |
157 | 'authprotocol' => undef, # SMNP v3 | |
158 | 'privpassword' => undef, # SMNP v3 | |
159 | 'privkey' => undef, # SMNP v3 | |
160 | 'privprotocol' => undef, # SMNP v3 | |
161 | ); | |
162 | ||
163 | # Get options | |
164 | GetOptions('b|blacklist=s' => \@{ $opt{blacklist} }, | |
165 | 'check=s' => \@{ $opt{check} }, | |
166 | 'c|critical=s' => \@{ $opt{critical} }, | |
167 | 'w|warning=s' => \@{ $opt{warning} }, | |
168 | 't|timeout=i' => \$opt{timeout}, | |
169 | 'd|debug' => \$opt{debug}, | |
170 | 'h|help' => \$opt{help}, | |
171 | 'V|version' => \$opt{version}, | |
172 | 'p|perfdata:s' => \$opt{perfdata}, | |
173 | 'i|info' => \$opt{info}, | |
174 | 'e|extinfo' => \$opt{extinfo}, | |
175 | 'htmlinfo:s' => \$opt{htmlinfo}, | |
176 | 'postmsg=s' => \$opt{postmsg}, | |
177 | 's|state' => \$opt{state}, | |
178 | 'short-state' => \$opt{shortstate}, | |
179 | 'o|ok-info=i' => \$opt{okinfo}, | |
180 | 'l|linebreak=s' => \$opt{linebreak}, | |
181 | 'a|all' => \$opt{all}, | |
182 | 'only=s' => \$opt{only}, | |
9ed0700c | 183 | 'omreport=s' => \$opt{omreport}, |
669797e1 | 184 | 'port=i' => \$opt{port}, |
185 | 'H|hostname=s' => \$opt{hostname}, | |
186 | 'C|community=s' => \$opt{community}, | |
187 | 'P|protocol=i' => \$opt{protocol}, | |
188 | 'U|username=s' => \$opt{username}, | |
189 | 'authpassword=s' => \$opt{authpassword}, | |
190 | 'authkey=s' => \$opt{authkey}, | |
191 | 'authprotocol=s' => \$opt{authprotocol}, | |
192 | 'privpassword=s' => \$opt{privpassword}, | |
193 | 'privkey=s' => \$opt{privkey}, | |
194 | 'privprotocol=s' => \$opt{privprotocol}, | |
195 | ) or do { print $USAGE; exit $E_UNKNOWN }; | |
196 | ||
197 | # If user requested help | |
198 | if ($opt{help}) { | |
199 | print $USAGE, $HELP; | |
200 | exit $E_OK; | |
201 | } | |
202 | ||
203 | # If user requested version info | |
204 | if ($opt{version}) { | |
205 | print $LICENSE; | |
206 | exit $E_OK; | |
207 | } | |
208 | ||
209 | # Setting timeout | |
210 | $SIG{ALRM} = sub { | |
211 | print "PLUGIN TIMEOUT: $NAME timed out after $opt{timeout} seconds\n"; | |
212 | exit $E_UNKNOWN; | |
213 | }; | |
214 | alarm $opt{timeout}; | |
215 | ||
216 | # If we're using SNMP | |
217 | $snmp = defined $opt{hostname} ? 1 : 0; | |
218 | ||
219 | # SNMP session variables | |
220 | $snmp_session = undef; | |
221 | $snmp_error = undef; | |
222 | ||
223 | # The omreport command | |
224 | $omreport = undef; | |
225 | ||
226 | # Check flags, override available with the --check option | |
227 | %check = ( 'storage' => 1, # check storage subsystem | |
228 | 'memory' => 1, # check memory (dimms) | |
229 | 'fans' => 1, # check fan status | |
230 | 'power' => 1, # check power supplies | |
231 | 'temp' => 1, # check temperature | |
232 | 'cpu' => 1, # check processors | |
233 | 'voltage' => 1, # check voltage | |
234 | 'batteries' => 1, # check battery probes | |
235 | 'amperage' => 1, # check power consumption | |
236 | 'intrusion' => 1, # check intrusion detection | |
237 | 'alertlog' => 0, # check the alert log | |
238 | 'esmlog' => 0, # check the ESM log (hardware log) | |
239 | 'esmhealth' => 1, # check the ESM log overall health | |
240 | ); | |
241 | ||
242 | # Default line break | |
51e99613 | 243 | $linebreak = isatty(*STDOUT) ? "\n" : '<br/>'; |
669797e1 | 244 | |
245 | # Line break from option | |
246 | if (defined $opt{linebreak}) { | |
247 | if ($opt{linebreak} eq 'REG') { | |
248 | $linebreak = "\n"; | |
249 | } | |
250 | elsif ($opt{linebreak} eq 'HTML') { | |
251 | $linebreak = '<br/>'; | |
252 | } | |
253 | else { | |
254 | $linebreak = $opt{linebreak}; | |
255 | } | |
256 | } | |
257 | ||
258 | # Exit with status=UNKNOWN if there is firmware upgrade in progress | |
259 | if (!$snmp && -f $FW_LOCK) { | |
260 | print "MONITORING DISABLED - Firmware update in progress ($FW_LOCK exists)\n"; | |
261 | exit $E_UNKNOWN; | |
262 | } | |
263 | ||
264 | # List of controllers and enclosures | |
265 | @controllers = (); # controllers | |
266 | @enclosures = (); # enclosures | |
267 | ||
268 | # Messages | |
269 | @report_storage = (); # messages with associated nagios level (storage) | |
270 | @report_chassis = (); # messages with associated nagios level (chassis) | |
271 | @report_other = (); # messages with associated nagios level (other) | |
272 | ||
273 | # Counters for everything | |
274 | %count | |
275 | = ( | |
276 | 'pdisk' => 0, # number of physical disks | |
277 | 'vdisk' => 0, # number of logical drives (virtual disks) | |
278 | 'temp' => 0, # number of temperature probes | |
279 | 'volt' => 0, # number of voltage probes | |
280 | 'amp' => 0, # number of amperage probes | |
281 | 'intr' => 0, # number of intrusion probes | |
282 | 'dimm' => 0, # number of memory modules | |
283 | 'fan' => 0, # number of fan probes | |
284 | 'cpu' => 0, # number of CPUs | |
285 | 'bat' => 0, # number of batteries | |
286 | 'power' => 0, # number of power supplies | |
287 | 'esm' => { | |
288 | 'Critical' => 0, # critical entries in ESM log | |
289 | 'Non-Critical' => 0, # warning entries in ESM log | |
290 | 'Ok' => 0, # ok entries in ESM log | |
291 | }, | |
292 | 'alert' => { | |
293 | 'Critical' => 0, # critical entries in alert log | |
294 | 'Non-Critical' => 0, # warning entries in alert log | |
295 | 'Ok' => 0, # ok entries in alert log | |
296 | }, | |
297 | ); | |
298 | ||
299 | # Performance data | |
300 | %perfdata = (); | |
301 | ||
302 | # Global health status | |
303 | $global = 1; # default is to check global status | |
304 | $globalstatus = $E_OK; # default global health status is "OK" | |
305 | ||
306 | # Nagios error levels reversed | |
307 | %reverse_exitcode | |
308 | = ( | |
309 | $E_OK => 'OK', | |
310 | $E_WARNING => 'WARNING', | |
311 | $E_CRITICAL => 'CRITICAL', | |
312 | $E_UNKNOWN => 'UNKNOWN', | |
313 | ); | |
314 | ||
315 | # OpenManage (omreport) and SNMP error levels | |
316 | %status2nagios | |
317 | = ( | |
318 | 'Unknown' => $E_CRITICAL, | |
319 | 'Critical' => $E_CRITICAL, | |
320 | 'Non-Critical' => $E_WARNING, | |
321 | 'Ok' => $E_OK, | |
322 | 'Non-Recoverable' => $E_CRITICAL, | |
323 | 'Other' => $E_CRITICAL, | |
324 | ); | |
325 | ||
326 | # Status via SNMP | |
327 | %snmp_status | |
328 | = ( | |
329 | 1 => 'Other', | |
330 | 2 => 'Unknown', | |
331 | 3 => 'Ok', | |
332 | 4 => 'Non-Critical', | |
333 | 5 => 'Critical', | |
334 | 6 => 'Non-Recoverable', | |
335 | ); | |
336 | ||
337 | # Probe Status via SNMP | |
338 | %snmp_probestatus | |
339 | = ( | |
340 | 1 => 'Other', # probe status is not one of the following: | |
341 | 2 => 'Unknown', # probe status is unknown (not known or monitored) | |
342 | 3 => 'Ok', # probe is reporting a value within the thresholds | |
343 | 4 => 'nonCriticalUpper', # probe has crossed upper noncritical threshold | |
344 | 5 => 'criticalUpper', # probe has crossed upper critical threshold | |
345 | 6 => 'nonRecoverableUpper', # probe has crossed upper non-recoverable threshold | |
346 | 7 => 'nonCriticalLower', # probe has crossed lower noncritical threshold | |
347 | 8 => 'criticalLower', # probe has crossed lower critical threshold | |
348 | 9 => 'nonRecoverableLower', # probe has crossed lower non-recoverable threshold | |
349 | 10 => 'failed', # probe is not functional | |
350 | ); | |
351 | ||
352 | # Probe status translated to Nagios alarm levels | |
353 | %probestatus2nagios | |
354 | = ( | |
355 | 'Other' => $E_CRITICAL, | |
356 | 'Unknown' => $E_CRITICAL, | |
357 | 'Ok' => $E_OK, | |
358 | 'nonCriticalUpper' => $E_WARNING, | |
359 | 'criticalUpper' => $E_CRITICAL, | |
360 | 'nonRecoverableUpper' => $E_CRITICAL, | |
361 | 'nonCriticalLower' => $E_WARNING, | |
362 | 'criticalLower' => $E_CRITICAL, | |
363 | 'nonRecoverableLower' => $E_CRITICAL, | |
364 | 'failed' => $E_CRITICAL, | |
365 | ); | |
366 | ||
367 | # System information gathered | |
368 | %sysinfo | |
369 | = ( | |
370 | 'bios' => 'N/A', # BIOS version | |
371 | 'biosdate' => 'N/A', # BIOS release date | |
372 | 'serial' => 'N/A', # serial number (service tag) | |
373 | 'model' => 'N/A', # system model | |
374 | 'osname' => 'N/A', # OS name | |
375 | 'osver' => 'N/A', # OS version | |
376 | 'om' => 'N/A', # OMSA version | |
377 | 'bmc' => 0, # HAS baseboard management controller (BMC) | |
378 | 'rac' => 0, # HAS remote access controller (RAC) | |
379 | 'rac_name' => 'N/A', # remote access controller (RAC) | |
380 | 'bmc_fw' => 'N/A', # BMC firmware | |
381 | 'rac_fw' => 'N/A', # RAC firmware | |
382 | ); | |
383 | ||
384 | # Adjust which checks to perform | |
385 | adjust_checks() if defined $opt{check}; | |
386 | ||
387 | # Blacklisted components | |
388 | %blacklist = defined $opt{blacklist} ? %{ get_blacklist() } : (); | |
389 | ||
390 | # If blacklisting is in effect, don't check global health status | |
391 | if (scalar keys %blacklist > 0) { | |
392 | $global = 0; | |
393 | } | |
394 | ||
395 | # Take into account new hardware and blades | |
396 | $omopt_chassis = 'chassis'; # default "chassis" option to omreport | |
397 | $omopt_system = 'system'; # default "system" option to omreport | |
398 | $blade = 0; # if this is a blade system | |
399 | ||
400 | # Some initializations and checking before we begin | |
401 | if ($snmp) { | |
402 | snmp_initialize(); # initialize SNMP | |
403 | snmp_check(); # check that SNMP works | |
404 | snmp_detect_blade(); # detect blade via SNMP | |
405 | } | |
406 | else { | |
407 | # Find the omreport binary | |
408 | find_omreport(); | |
409 | # Check help output from omreport, see which options are available. | |
410 | # Also detecting blade via omreport. | |
411 | check_omreport_options(); | |
412 | } | |
413 | ||
414 | ||
415 | #--------------------------------------------------------------------- | |
416 | # Helper functions | |
417 | #--------------------------------------------------------------------- | |
418 | ||
419 | # | |
420 | # Store a message in one of the message arrays | |
421 | # | |
422 | sub report { | |
423 | my ($type, $msg, $exval, $id) = @_; | |
424 | defined $id or $id = q{}; | |
425 | ||
426 | my %type2array | |
427 | = ( | |
428 | 'storage' => \@report_storage, | |
429 | 'chassis' => \@report_chassis, | |
430 | 'other' => \@report_other, | |
431 | ); | |
432 | ||
433 | return push @{ $type2array{$type} }, [ $msg, $exval, $id ]; | |
434 | } | |
435 | ||
436 | ||
437 | # | |
438 | # Run command, put resulting output lines in an array and return a | |
439 | # pointer to that array | |
440 | # | |
441 | sub run_command { | |
442 | my $command = shift; | |
443 | ||
444 | open my $CMD, '-|', $command | |
445 | or do { report('other', "Couldn't run command '$command': $!", $E_UNKNOWN) | |
446 | and return [] }; | |
447 | my @lines = <$CMD>; | |
448 | close $CMD | |
449 | or do { report('other', "Couldn't close filehandle for command '$command': $!", $E_UNKNOWN) | |
450 | and return \@lines }; | |
451 | return \@lines; | |
452 | } | |
453 | ||
454 | # | |
455 | # Run command, put resulting output in a string variable and return it | |
456 | # | |
457 | sub slurp_command { | |
458 | my $command = shift; | |
459 | ||
460 | open my $CMD, '-|', $command | |
461 | or do { report('other', "Couldn't run command '$command': $!", $E_UNKNOWN) and return }; | |
462 | my $rawtext = do { local $/ = undef; <$CMD> }; # slurping | |
463 | close $CMD; | |
464 | ||
465 | # NOTE: We don't check the return value of close() since omreport | |
466 | # does something weird sometimes. | |
467 | ||
468 | return $rawtext; | |
469 | } | |
470 | ||
471 | # | |
472 | # Initialize SNMP | |
473 | # | |
474 | sub snmp_initialize { | |
475 | # Legal SNMP v3 protocols | |
476 | my $snmp_v3_privprotocol = qr{\A des|aes|aes128|3des|3desde \z}xms; | |
477 | my $snmp_v3_authprotocol = qr{\A md5|sha \z}xms; | |
478 | ||
479 | # Parameters to Net::SNMP->session() | |
480 | my %param | |
481 | = ( | |
482 | '-port' => $opt{port}, | |
483 | '-hostname' => $opt{hostname}, | |
484 | '-version' => $opt{protocol}, | |
485 | ); | |
486 | ||
487 | # Parameters for SNMP v3 | |
488 | if ($opt{protocol} == 3) { | |
489 | ||
490 | # Username is mandatory | |
491 | if (defined $opt{username}) { | |
492 | $param{'-username'} = $opt{username}; | |
493 | } | |
494 | else { | |
495 | print "SNMP ERROR: With SNMPv3 the username must be specified\n"; | |
496 | exit $E_UNKNOWN; | |
497 | } | |
498 | ||
499 | # Authpassword is optional | |
500 | if (defined $opt{authpassword}) { | |
501 | $param{'-authpassword'} = $opt{authpassword}; | |
502 | } | |
503 | ||
504 | # Authkey is optional | |
505 | if (defined $opt{authkey}) { | |
506 | $param{'-authkey'} = $opt{authkey}; | |
507 | } | |
508 | ||
509 | # Privpassword is optional | |
510 | if (defined $opt{privpassword}) { | |
511 | $param{'-privpassword'} = $opt{privpassword}; | |
512 | } | |
513 | ||
514 | # Privkey is optional | |
515 | if (defined $opt{privkey}) { | |
516 | $param{'-privkey'} = $opt{privkey}; | |
517 | } | |
518 | ||
519 | # Privprotocol is optional | |
520 | if (defined $opt{privprotocol}) { | |
521 | if ($opt{privprotocol} =~ m/$snmp_v3_privprotocol/xms) { | |
522 | $param{'-privprotocol'} = $opt{privprotocol}; | |
523 | } | |
524 | else { | |
525 | print "SNMP ERROR: Unknown privprotocol '$opt{privprotocol}', " | |
526 | . "must be one of [des|aes|aes128|3des|3desde]\n"; | |
527 | exit $E_UNKNOWN; | |
528 | } | |
529 | } | |
530 | ||
531 | # Authprotocol is optional | |
532 | if (defined $opt{authprotocol}) { | |
533 | if ($opt{authprotocol} =~ m/$snmp_v3_authprotocol/xms) { | |
534 | $param{'-authprotocol'} = $opt{authprotocol}; | |
535 | } | |
536 | else { | |
537 | print "SNMP ERROR: Unknown authprotocol '$opt{authprotocol}', " | |
538 | . "must be one of [md5|sha]\n"; | |
539 | exit $E_UNKNOWN; | |
540 | } | |
541 | } | |
542 | } | |
543 | # Parameters for SNMP v2c or v1 | |
544 | elsif ($opt{protocol} == 2 or $opt{protocol} == 1) { | |
545 | $param{'-community'} = $opt{community}; | |
546 | } | |
547 | else { | |
548 | print "SNMP ERROR: Unknown SNMP version '$opt{protocol}'\n"; | |
549 | exit $E_UNKNOWN; | |
550 | } | |
551 | ||
552 | # Try to initialize the SNMP session | |
553 | if ( eval { require Net::SNMP; 1 } ) { | |
554 | ($snmp_session, $snmp_error) = Net::SNMP->session( %param ); | |
555 | if (!defined $snmp_session) { | |
556 | printf "SNMP: %s\n", $snmp_error; | |
557 | exit $E_UNKNOWN; | |
558 | } | |
559 | } | |
560 | else { | |
561 | print "You need perl module Net::SNMP to run $NAME in SNMP mode\n"; | |
562 | exit $E_UNKNOWN; | |
563 | } | |
564 | return; | |
565 | } | |
566 | ||
567 | # | |
568 | # Checking if SNMP works by probing for "chassisModelName", which all | |
569 | # servers should have | |
570 | # | |
571 | sub snmp_check { | |
572 | my $chassisModelName = '1.3.6.1.4.1.674.10892.1.300.10.1.9.1'; | |
573 | my $result = $snmp_session->get_request(-varbindlist => [$chassisModelName]); | |
574 | ||
575 | # Typically if remote host isn't responding | |
576 | if (!defined $result) { | |
577 | printf "SNMP CRITICAL: %s\n", $snmp_session->error; | |
578 | exit $E_CRITICAL; | |
579 | } | |
580 | ||
581 | # If OpenManage isn't installed or is not working | |
582 | if ($result->{$chassisModelName} =~ m{\A noSuch (Instance|Object) \z}xms) { | |
583 | print "ERROR: (SNMP) OpenManage is not installed or is not working correctly\n"; | |
584 | exit $E_UNKNOWN; | |
585 | } | |
586 | return; | |
587 | } | |
588 | ||
589 | # | |
590 | # Detecting blade via SNMP | |
591 | # | |
592 | sub snmp_detect_blade { | |
593 | my $DellBaseBoardType = '1.3.6.1.4.1.674.10892.1.300.80.1.7.1.1'; | |
594 | my $result = $snmp_session->get_request(-varbindlist => [$DellBaseBoardType]); | |
595 | ||
596 | # Identify blade. Older models (4th and 5th gen models) and/or old | |
597 | # OMSA (4.x) don't have this OID. If we get "noSuchInstance" or | |
598 | # similar, we assume that this isn't a blade | |
599 | if ($result->{$DellBaseBoardType} eq '3') { | |
600 | $blade = 1; | |
601 | } | |
602 | return; | |
603 | } | |
604 | ||
605 | # | |
606 | # Locate the omreport binary | |
607 | # | |
608 | sub find_omreport { | |
ac760e0d | 609 | # If user has specified path to omreport |
610 | if (defined $opt{omreport} and -x $opt{omreport}) { | |
60994ca4 | 611 | $omreport = qq{"$opt{omreport}"}; |
ac760e0d | 612 | return; |
613 | } | |
614 | ||
669797e1 | 615 | # Possible full paths for omreport |
616 | my @omreport_paths | |
617 | = ( | |
618 | '/usr/bin/omreport', # default on Linux | |
6a050646 | 619 | '/opt/dell/srvadmin/bin/omreport', # default on Linux with OMSA 6.2.0 |
669797e1 | 620 | '/opt/dell/srvadmin/oma/bin/omreport.sh', # alternate on Linux |
621 | '/opt/dell/srvadmin/oma/bin/omreport', # alternate on Linux | |
9025e83f | 622 | 'C:\Program Files (x86)\Dell\SysMgt\oma\bin\omreport.exe', # default on Windows x64 |
623 | 'C:\Program Files\Dell\SysMgt\oma\bin\omreport.exe', # default on Windows x32 | |
421b6c77 | 624 | 'c:\progra~1\dell\sysmgt\oma\bin\omreport.exe', # 8bit legacy default on Windows x32 |
625 | 'c:\progra~2\dell\sysmgt\oma\bin\omreport.exe', # 8bit legacy default on Windows x64 | |
669797e1 | 626 | ); |
627 | ||
628 | # Find the one to use | |
629 | OMREPORT_PATH: | |
630 | foreach my $bin (@omreport_paths) { | |
631 | if (-x $bin) { | |
60347693 | 632 | $omreport = qq{"$bin"}; |
669797e1 | 633 | last OMREPORT_PATH; |
634 | } | |
635 | } | |
636 | ||
637 | # Exit with status=UNKNOWN if OM is not installed, or we don't | |
638 | # have permission to execute the binary | |
639 | if (!defined $omreport) { | |
640 | print "ERROR: Dell OpenManage Server Administrator (OMSA) is not installed\n"; | |
641 | exit $E_UNKNOWN; | |
642 | } | |
643 | return; | |
644 | } | |
645 | ||
646 | # | |
647 | # Checks output from 'omreport -?' and searches for arguments to | |
648 | # omreport, to accommodate deprecated options "chassis" and "system" | |
649 | # (on newer hardware), as well as blade servers. | |
650 | # | |
651 | sub check_omreport_options { | |
652 | foreach (@{ run_command("$omreport -? 2>&1") }) { | |
653 | if (m/\A servermodule /xms) { | |
654 | # If "servermodule" argument to omreport exists, use it | |
655 | # instead of argument "system" | |
656 | $omopt_system = 'servermodule'; | |
657 | } | |
658 | elsif (m/\A mainsystem /xms) { | |
659 | # If "mainsystem" argument to omreport exists, use it | |
660 | # instead of argument "chassis" | |
661 | $omopt_chassis = 'mainsystem'; | |
662 | } | |
663 | elsif (m/\A modularenclosure /xms) { | |
664 | # If "modularenclusure" argument to omreport exists, assume | |
665 | # that this is a blade | |
666 | $blade = 1; | |
667 | } | |
668 | } | |
669 | return; | |
670 | } | |
671 | ||
672 | # | |
673 | # Read the blacklist option and return a hash containing the | |
674 | # blacklisted components | |
675 | # | |
676 | sub get_blacklist { | |
677 | my @bl = (); | |
678 | my %blacklist = (); | |
679 | ||
680 | if (scalar @{ $opt{blacklist} } >= 0) { | |
681 | foreach my $black (@{ $opt{blacklist} }) { | |
682 | my $tmp = q{}; | |
683 | if (-f $black) { | |
684 | open my $BL, '<', $black | |
685 | or do { report('other', "Couldn't open blacklist file $black: $!", $E_UNKNOWN) | |
686 | and return {} }; | |
687 | $tmp = <$BL>; | |
688 | close $BL; | |
689 | chomp $tmp; | |
690 | } | |
691 | else { | |
692 | $tmp = $black; | |
693 | } | |
694 | push @bl, $tmp; | |
695 | } | |
696 | } | |
697 | ||
698 | return {} if $#bl < 0; | |
699 | ||
700 | # Parse blacklist string, put in hash | |
701 | foreach my $black (@bl) { | |
702 | my @comps = split m{/}xms, $black; | |
703 | foreach my $c (@comps) { | |
704 | next if $c !~ m/=/xms; | |
705 | my ($key, $val) = split /=/xms, $c; | |
706 | my @vals = split /,/xms, $val; | |
707 | $blacklist{$key} = \@vals; | |
708 | } | |
709 | } | |
710 | ||
711 | return \%blacklist; | |
712 | } | |
713 | ||
714 | # | |
715 | # Read the check option and adjust the hash %check, which is a rough | |
716 | # list of components to be checked | |
717 | # | |
718 | sub adjust_checks { | |
719 | my @cl = (); | |
720 | ||
721 | # Adjust checking based on the '--all' option | |
722 | if ($opt{all}) { | |
723 | # Check option usage | |
724 | if (defined $opt{only} and $opt{only} !~ m{\A critical|warning \z}xms) { | |
725 | print qq{ERROR: Wrong simultaneous usage of the "--all" and "--only" options\n}; | |
726 | exit $E_UNKNOWN; | |
727 | } | |
728 | if (scalar @{ $opt{check} } > 0) { | |
729 | print qq{ERROR: Wrong simultaneous usage of the "--all" and "--check" options\n}; | |
730 | exit $E_UNKNOWN; | |
731 | } | |
732 | ||
733 | # set the check hash to check everything | |
734 | map { $_ = 1 } values %check; | |
735 | ||
736 | return; | |
737 | } | |
738 | ||
739 | # Adjust checking based on the '--only' option | |
740 | if (defined $opt{only} and $opt{only} !~ m{\A critical|warning \z}xms) { | |
741 | # Check option usage | |
742 | if (scalar @{ $opt{check} } > 0) { | |
743 | print qq{ERROR: Wrong simultaneous usage of the "--only" and "--check" options\n}; | |
744 | exit $E_UNKNOWN; | |
745 | } | |
a2bbb2c1 | 746 | if (! exists $check{$opt{only}} && $opt{only} ne 'chassis') { |
669797e1 | 747 | print qq{ERROR: "$opt{only}" is not a known keyword for the "--only" option\n}; |
748 | exit $E_UNKNOWN; | |
749 | } | |
750 | ||
751 | # reset the check hash | |
752 | map { $_ = 0 } values %check; | |
753 | ||
754 | # adjust the check hash | |
755 | if ($opt{only} eq 'chassis') { | |
756 | map { $check{$_} = 1 } qw(memory fans power temp cpu voltage | |
757 | batteries amperage intrusion esmhealth); | |
758 | } | |
759 | else { | |
760 | $check{$opt{only}} = 1; | |
761 | } | |
762 | ||
763 | return; | |
764 | } | |
765 | ||
766 | # Adjust checking based on the '--check' option | |
767 | if (scalar @{ $opt{check} } >= 0) { | |
768 | foreach my $check (@{ $opt{check} }) { | |
769 | my $tmp = q{}; | |
770 | if (-f $check) { | |
771 | open my $CL, '<', $check | |
772 | or do { report('other', "Couldn't open check file $check: $!", $E_UNKNOWN) and return }; | |
773 | $tmp = <$CL>; | |
774 | close $CL; | |
775 | } | |
776 | else { | |
777 | $tmp = $check; | |
778 | } | |
779 | push @cl, $tmp; | |
780 | } | |
781 | } | |
782 | ||
783 | return if $#cl < 0; | |
784 | ||
785 | # Parse checklist string, put in hash | |
786 | foreach my $check (@cl) { | |
787 | my @checks = split /,/xms, $check; | |
788 | foreach my $c (@checks) { | |
789 | next if $c !~ m/=/xms; | |
790 | my ($key, $val) = split /=/xms, $c; | |
791 | $check{$key} = $val; | |
792 | } | |
793 | } | |
794 | ||
795 | # Check if we should check global health status | |
796 | CHECK_KEY: | |
797 | foreach (keys %check) { | |
798 | next CHECK_KEY if $_ eq 'esmlog'; # not part of global status | |
799 | next CHECK_KEY if $_ eq 'alertlog'; # not part of global status | |
800 | ||
801 | if ($check{$_} == 0) { # found something with checking turned off | |
802 | $global = 0; | |
803 | last CHECK_KEY; | |
804 | } | |
805 | } | |
806 | ||
807 | return; | |
808 | } | |
809 | ||
810 | # | |
811 | # Runs omreport and returns an array of anonymous hashes containing | |
812 | # the output. | |
813 | # Takes one argument: string containing parameters to omreport | |
814 | # | |
815 | sub run_omreport { | |
816 | my $command = shift; | |
817 | my @output = (); | |
818 | my @keys = (); | |
819 | ||
820 | # Errors that are OK. Some low-end poweredge (and blades) models | |
821 | # don't have RAID controllers, intrusion detection sensor, or | |
822 | # redundant/instrumented power supplies etc. | |
823 | my $ok_errors | |
824 | = qr{ | |
825 | Intrusion\sinformation\sis\snot\sfound\sfor\sthis\ssystem # No intrusion probe | |
826 | | No\sinstrumented\spower\ssupplies\sfound\son\sthis\ssystem # No instrumented PS (blades/low-end) | |
827 | | No\scontrollers\sfound # No RAID controller | |
828 | | No\sbattery\sprobes\sfound\son\sthis\ssystem # No battery probes | |
829 | | Invalid\scommand:\spwrmonitoring # Older OMSAs lack this command(?) | |
4a4baf82 | 830 | # | Current\sprobes\snot\sfound # No power monitoring capability |
669797e1 | 831 | }xms; |
832 | ||
833 | # Errors that are OK on blade servers | |
834 | my $ok_blade_errors | |
835 | = qr{ | |
836 | No\sfan\sprobes\sfound\son\sthis\ssystem # No fan probes | |
837 | }xms; | |
838 | ||
839 | # Run omreport and fetch output | |
840 | my $rawtext = slurp_command("$omreport $command -fmt ssv 2>&1"); | |
841 | return [] if !defined $rawtext; | |
842 | ||
843 | # Workaround for Openmanage BUG introduced in OMSA 5.5.0 | |
4a4baf82 | 844 | $rawtext =~ s{\n;}{;}gxms if $command eq 'storage controller'; |
845 | ||
846 | # Openmanage sometimes puts a linebreak between "Error" and the | |
847 | # actual error text | |
49a51b07 | 848 | $rawtext =~ s{^Error\s*\n}{Error: }xms; |
669797e1 | 849 | |
850 | # Parse output, store in array | |
4a4baf82 | 851 | for ((split m{\n}xms, $rawtext)) { |
852 | if (m{\AError}xms) { | |
669797e1 | 853 | next if m{$ok_errors}xms; |
854 | next if ($blade and m{$ok_blade_errors}xms); | |
855 | report('other', "Problem running 'omreport $command': $_", $E_UNKNOWN); | |
856 | } | |
857 | ||
858 | next if !m/(.*?;){2}/xms; # ignore lines with less than 3 fields | |
859 | my @vals = split /;/xms; | |
8ce893fd | 860 | if ($vals[0] =~ m/\A (Index|ID|Severity|Processor|Current\sSpeed) \z/xms) { |
669797e1 | 861 | @keys = @vals; |
862 | } | |
863 | else { | |
864 | my $i = 0; | |
865 | push @output, { map { $_ => $vals[$i++] } @keys }; | |
866 | } | |
867 | ||
868 | } | |
869 | ||
870 | # Finally, return the collected information | |
871 | return \@output; | |
872 | } | |
873 | ||
874 | ||
875 | # | |
876 | # Checks if a component is blacklisted. Returns 1 if the component is | |
877 | # blacklisted, 0 otherwise. Takes two arguments: | |
878 | # arg1: component name | |
879 | # arg2: component id or index | |
880 | # | |
881 | sub blacklisted { | |
882 | my $name = shift; # component name | |
883 | my $id = shift; # component id | |
884 | my $ret = 0; # return value | |
885 | ||
886 | if (defined $blacklist{$name}) { | |
887 | foreach my $comp (@{ $blacklist{$name} }) { | |
d4c27ad8 | 888 | if (defined $id and ($comp eq $id or uc($comp) eq 'ALL')) { |
669797e1 | 889 | $ret = 1; |
890 | } | |
891 | } | |
892 | } | |
893 | ||
894 | return $ret; | |
895 | } | |
896 | ||
897 | # Converts the NexusID from SNMP to our version | |
898 | sub convert_nexus { | |
899 | my $nexus = shift; | |
900 | $nexus =~ s{\A \\}{}xms; | |
901 | $nexus =~ s{\\}{:}gxms; | |
902 | return $nexus; | |
903 | } | |
904 | ||
905 | # Sets custom temperature thresholds based on user supplied options | |
906 | sub custom_temperature_thresholds { | |
907 | my $type = shift; # type of threshold, either w (warning) or c (critical) | |
908 | my %thres = (); # will contain the thresholds | |
909 | my @limits = (); # holds the input | |
910 | ||
911 | my @opt = $type eq 'w' ? @{ $opt{warning} } : @{ $opt{critical} }; | |
912 | ||
913 | if (scalar @opt >= 0) { | |
914 | foreach my $t (@opt) { | |
915 | my $tmp = q{}; | |
916 | if (-f $t) { | |
917 | open my $F, '<', $t | |
918 | or do { report('other', "Couldn't open temperature threshold file $t: $!", | |
919 | $E_UNKNOWN) and return {} }; | |
920 | $tmp = <$F>; | |
921 | close $F; | |
922 | } | |
923 | else { | |
924 | $tmp = $t; | |
925 | } | |
926 | push @limits, $tmp; | |
927 | } | |
928 | } | |
929 | ||
930 | # Parse checklist string, put in hash | |
931 | foreach my $th (@limits) { | |
932 | my @tmp = split m{,}xms, $th; | |
933 | foreach my $t (@tmp) { | |
934 | next if $t !~ m{=}xms; | |
935 | my ($key, $val) = split m{=}xms, $t; | |
936 | if ($val =~ m{/}xms) { | |
937 | my ($max, $min) = split m{/}xms, $val; | |
938 | $thres{$key}{max} = $max; | |
939 | $thres{$key}{min} = $min; | |
940 | } | |
941 | else { | |
942 | $thres{$key}{max} = $val; | |
943 | } | |
944 | } | |
945 | } | |
946 | ||
947 | return \%thres; | |
948 | } | |
949 | ||
950 | ||
951 | # Gets the output from SNMP result according to the OIDs checked | |
952 | sub get_snmp_output { | |
953 | my ($result,$oidref) = @_; | |
954 | my @output = (); | |
955 | ||
956 | foreach my $oid (keys %{ $result }) { | |
957 | my @dummy = split /\./xms, $oid; | |
958 | my $id = pop @dummy; | |
959 | --$id; | |
960 | my $foo = join q{.}, @dummy; | |
961 | if (exists $oidref->{$foo}) { | |
962 | $output[$id]{$oidref->{$foo}} = $result->{$oid}; | |
963 | } | |
964 | } | |
965 | return \@output; | |
966 | } | |
967 | ||
968 | ||
969 | # Map the controller or other item in-place | |
970 | sub map_item { | |
971 | my ($key, $val, $list) = @_; | |
972 | ||
973 | foreach my $lst (@{ $list }) { | |
974 | if (!exists $lst->{$key}) { | |
975 | $lst->{$key} = $val; | |
976 | } | |
977 | } | |
978 | return; | |
979 | } | |
980 | ||
981 | # Return the URL for official Dell documentation for a specific | |
982 | # PowerEdge server | |
983 | sub documentation_url { | |
984 | my $model = shift; | |
985 | ||
986 | # create model short form, e.g. "r710" | |
987 | $model =~ s{\A PowerEdge \s (.+?) \z}{lc($1)}exms; | |
988 | ||
989 | # special case for blades (e.g. M600, M710), they have common | |
990 | # documentation | |
991 | $model =~ s{\A m\d+ \z}{m}xms; | |
992 | ||
993 | return 'http://support.dell.com/support/edocs/systems/pe' . $model . '/'; | |
994 | } | |
995 | ||
996 | # Return the URL for warranty information for a server with a given | |
997 | # serial number (servicetag) | |
998 | sub warranty_url { | |
999 | my $tag = shift; | |
1000 | ||
1001 | # Dell support sites for different parts of the world | |
1002 | my %supportsite | |
1003 | = ( | |
1004 | 'emea' => 'http://support.euro.dell.com/support/topics/topic.aspx/emea/shared/support/my_systems_info/', | |
1005 | 'ap' => 'http://supportapj.dell.com/support/topics/topic.aspx/ap/shared/support/my_systems_info/en/details?', | |
1006 | 'glob' => 'http://support.dell.com/support/topics/global.aspx/support/my_systems_info/details?', | |
1007 | ); | |
1008 | ||
1009 | # warranty URLs for different country codes | |
1010 | my %url | |
1011 | = ( | |
1012 | # EMEA | |
1013 | 'at' => $supportsite{emea} . 'de/details?c=at&l=de&ServiceTag=', # Austria | |
1014 | 'be' => $supportsite{emea} . 'nl/details?c=be&l=nl&ServiceTag=', # Belgium | |
1015 | 'cz' => $supportsite{emea} . 'cs/details?c=cz&l=cs&ServiceTag=', # Czech Republic | |
1016 | 'de' => $supportsite{emea} . 'de/details?c=de&l=de&ServiceTag=', # Germany | |
1017 | 'dk' => $supportsite{emea} . 'da/details?c=dk&l=da&ServiceTag=', # Denmark | |
1018 | 'es' => $supportsite{emea} . 'es/details?c=es&l=es&ServiceTag=', # Spain | |
1019 | 'fi' => $supportsite{emea} . 'fi/details?c=fi&l=fi&ServiceTag=', # Finland | |
1020 | 'fr' => $supportsite{emea} . 'fr/details?c=fr&l=fr&ServiceTag=', # France | |
1021 | 'gr' => $supportsite{emea} . 'en/details?c=gr&l=el&ServiceTag=', # Greece | |
1022 | 'it' => $supportsite{emea} . 'it/details?c=it&l=it&ServiceTag=', # Italy | |
1023 | 'il' => $supportsite{emea} . 'en/details?c=il&l=en&ServiceTag=', # Israel | |
1024 | 'me' => $supportsite{emea} . 'en/details?c=me&l=en&ServiceTag=', # Middle East | |
1025 | 'no' => $supportsite{emea} . 'no/details?c=no&l=no&ServiceTag=', # Norway | |
1026 | 'nl' => $supportsite{emea} . 'nl/details?c=nl&l=nl&ServiceTag=', # The Netherlands | |
1027 | 'pl' => $supportsite{emea} . 'pl/details?c=pl&l=pl&ServiceTag=', # Poland | |
1028 | 'pt' => $supportsite{emea} . 'en/details?c=pt&l=pt&ServiceTag=', # Portugal | |
1029 | 'ru' => $supportsite{emea} . 'ru/details?c=ru&l=ru&ServiceTag=', # Russia | |
1030 | 'se' => $supportsite{emea} . 'sv/details?c=se&l=sv&ServiceTag=', # Sweden | |
1031 | 'uk' => $supportsite{emea} . 'en/details?c=uk&l=en&ServiceTag=', # United Kingdom | |
1032 | 'za' => $supportsite{emea} . 'en/details?c=za&l=en&ServiceTag=', # South Africa | |
1033 | # America | |
1034 | 'br' => $supportsite{glob} . 'c=br&l=pt&ServiceTag=', # Brazil | |
1035 | 'ca' => $supportsite{glob} . 'c=ca&l=en&ServiceTag=', # Canada | |
1036 | 'mx' => $supportsite{glob} . 'c=mx&l=es&ServiceTag=', # Mexico | |
1037 | 'us' => $supportsite{glob} . 'c=us&l=en&ServiceTag=', # USA | |
1038 | # Asia/Pacific | |
1039 | 'au' => $supportsite{ap} . 'c=au&l=en&ServiceTag=', # Australia | |
1040 | 'cn' => $supportsite{ap} . 'c=cn&l=zh&ServiceTag=', # China | |
1041 | 'in' => $supportsite{ap} . 'c=in&l=en&ServiceTag=', # India | |
1042 | # default fallback | |
1043 | 'XX' => $supportsite{glob} . 'ServiceTag=', # default | |
1044 | ); | |
1045 | ||
1046 | if (exists $url{$opt{htmlinfo}}) { | |
1047 | return $url{$opt{htmlinfo}} . $tag; | |
1048 | } | |
1049 | else { | |
1050 | return $url{XX} . $tag; | |
1051 | } | |
1052 | } | |
1053 | ||
1054 | ||
1055 | ||
1056 | #--------------------------------------------------------------------- | |
1057 | # Check functions | |
1058 | #--------------------------------------------------------------------- | |
1059 | ||
1060 | #----------------------------------------- | |
1061 | # Check global health status | |
1062 | #----------------------------------------- | |
1063 | sub check_global { | |
1064 | my $health = $E_OK; | |
1065 | ||
1066 | if ($snmp) { | |
1067 | # | |
1068 | # Checks global status, i.e. both storage and chassis | |
1069 | # | |
1070 | my $systemStateGlobalSystemStatus = '1.3.6.1.4.1.674.10892.1.200.10.1.2.1'; | |
1071 | my $result = $snmp_session->get_request(-varbindlist => [$systemStateGlobalSystemStatus]); | |
1072 | if (!defined $result) { | |
98b224a3 | 1073 | printf "SNMP ERROR [global]: %s\n", $snmp_error; |
669797e1 | 1074 | exit $E_UNKNOWN; |
1075 | } | |
1076 | $health = $status2nagios{$snmp_status{$result->{$systemStateGlobalSystemStatus}}}; | |
1077 | } | |
1078 | else { | |
1079 | # | |
1080 | # NB! This does not check storage, only chassis... | |
1081 | # | |
1082 | foreach (@{ run_command("$omreport $omopt_system -fmt ssv") }) { | |
1083 | next if !m/;/xms; | |
1084 | next if m/\A SEVERITY;COMPONENT/xms; | |
1085 | if (m/\A (.+?);Main\sSystem(\sChassis)? /xms) { | |
1086 | $health = $status2nagios{$1}; | |
1087 | last; | |
1088 | } | |
1089 | } | |
1090 | } | |
1091 | ||
1092 | # Return the status | |
1093 | return $health; | |
1094 | } | |
1095 | ||
1096 | ||
1097 | #----------------------------------------- | |
1098 | # STORAGE: Check controllers | |
1099 | #----------------------------------------- | |
1100 | sub check_controllers { | |
1101 | my $id = undef; | |
1102 | my $nexus = undef; | |
1103 | my $name = undef; | |
1104 | my $state = undef; | |
1105 | my $status = undef; | |
1106 | my $minfw = undef; | |
1107 | my $mindr = undef; | |
1108 | my $firmware = undef; | |
1109 | my $driver = undef; | |
08c259f3 | 1110 | my $minstdr = undef; # Minimum required Storport driver version (whats this?) |
1111 | my $stdr = undef; # Storport driver version (whats this?) | |
669797e1 | 1112 | my @output = (); |
1113 | ||
1114 | if ($snmp) { | |
1115 | my %ctrl_oid | |
1116 | = ( | |
1117 | '1.3.6.1.4.1.674.10893.1.20.130.1.1.1' => 'controllerNumber', | |
1118 | '1.3.6.1.4.1.674.10893.1.20.130.1.1.2' => 'controllerName', | |
1119 | '1.3.6.1.4.1.674.10893.1.20.130.1.1.5' => 'controllerState', | |
1120 | '1.3.6.1.4.1.674.10893.1.20.130.1.1.8' => 'controllerFWVersion', | |
1121 | '1.3.6.1.4.1.674.10893.1.20.130.1.1.38' => 'controllerComponentStatus', | |
1122 | '1.3.6.1.4.1.674.10893.1.20.130.1.1.39' => 'controllerNexusID', | |
1123 | '1.3.6.1.4.1.674.10893.1.20.130.1.1.41' => 'controllerDriverVersion', | |
1124 | '1.3.6.1.4.1.674.10893.1.20.130.1.1.44' => 'controllerMinFWVersion', | |
1125 | '1.3.6.1.4.1.674.10893.1.20.130.1.1.45' => 'controllerMinDriverVersion', | |
08c259f3 | 1126 | '1.3.6.1.4.1.674.10893.1.20.130.1.1.55' => 'FIXME_StorportDriverVersion', |
1127 | '1.3.6.1.4.1.674.10893.1.20.130.1.1.56' => 'FIXME_StorportMinDriverVersion', | |
669797e1 | 1128 | ); |
1129 | ||
1130 | # We use get_table() here for the odd case where a server has | |
1131 | # two or more controllers, and where some OIDs are missing on | |
1132 | # one of the controllers. | |
1133 | my $controllerTable = '1.3.6.1.4.1.674.10893.1.20.130.1'; | |
1134 | my $result = $snmp_session->get_table(-baseoid => $controllerTable); | |
1135 | ||
1136 | # No controllers is OK | |
1137 | return if !defined $result; | |
1138 | ||
1139 | @output = @{ get_snmp_output($result, \%ctrl_oid) }; | |
1140 | } | |
1141 | else { | |
1142 | @output = @{ run_omreport('storage controller') }; | |
1143 | } | |
1144 | ||
1145 | my %ctrl_state | |
1146 | = ( | |
1147 | 0 => 'Unknown', | |
1148 | 1 => 'Ready', | |
1149 | 2 => 'Failed', | |
1150 | 3 => 'Online', | |
1151 | 4 => 'Offline', | |
1152 | 6 => 'Degraded', | |
1153 | ); | |
1154 | ||
1155 | CTRL: | |
1156 | foreach my $out (@output) { | |
1157 | if ($snmp) { | |
08c259f3 | 1158 | $id = $out->{controllerNumber} - 1; |
1159 | $name = $out->{controllerName}; | |
1160 | $state = $ctrl_state{$out->{controllerState}}; | |
1161 | $status = $snmp_status{$out->{controllerComponentStatus}}; | |
1162 | $minfw = exists $out->{controllerMinFWVersion} | |
1163 | ? $out->{controllerMinFWVersion} : undef; | |
1164 | $mindr = exists $out->{controllerMinDriverVersion} | |
1165 | ? $out->{controllerMinDriverVersion} : undef; | |
669797e1 | 1166 | $firmware = exists $out->{controllerFWVersion} |
1167 | ? $out->{controllerFWVersion} : 'N/A'; | |
1168 | $driver = exists $out->{controllerDriverVersion} | |
1169 | ? $out->{controllerDriverVersion} : 'N/A'; | |
08c259f3 | 1170 | $minstdr = exists $out->{'FIXME_StorportMinDriverVersion'} |
1171 | ? $out->{FIXME_StorportMinDriverVersion} : undef; | |
1172 | $stdr = exists $out->{FIXME_StorportDriverVersion} | |
956cf4d1 | 1173 | ? $out->{FIXME_StorportDriverVersion} : undef; |
669797e1 | 1174 | $nexus = convert_nexus($out->{controllerNexusID}); |
1175 | } | |
1176 | else { | |
1177 | $id = $out->{ID}; | |
1178 | $name = $out->{Name}; | |
1179 | $state = $out->{State}; | |
1180 | $status = $out->{Status}; | |
1181 | $minfw = $out->{'Minimum Required Firmware Version'} ne 'Not Applicable' | |
1182 | ? $out->{'Minimum Required Firmware Version'} : undef; | |
1183 | $mindr = $out->{'Minimum Required Driver Version'} ne 'Not Applicable' | |
1184 | ? $out->{'Minimum Required Driver Version'} : undef; | |
1185 | $firmware = $out->{'Firmware Version'} ne 'Not Applicable' | |
1186 | ? $out->{'Firmware Version'} : 'N/A'; | |
1187 | $driver = $out->{'Driver Version'} ne 'Not Applicable' | |
1188 | ? $out->{'Driver Version'} : 'N/A'; | |
f86e57b8 | 1189 | $minstdr = (exists $out->{'Minimum Required Storport Driver Version'} |
1190 | and $out->{'Minimum Required Storport Driver Version'} ne 'Not Applicable') | |
08c259f3 | 1191 | ? $out->{'Minimum Required Storport Driver Version'} : undef; |
f86e57b8 | 1192 | $stdr = (exists $out->{'Storport Driver Version'} |
1193 | and $out->{'Storport Driver Version'} ne 'Not Applicable') | |
956cf4d1 | 1194 | ? $out->{'Storport Driver Version'} : undef; |
669797e1 | 1195 | $nexus = $id; |
1196 | } | |
1197 | ||
1198 | $name =~ s{\s+\z}{}xms; # remove trailing whitespace | |
1199 | push @controllers, $id; | |
1200 | ||
1201 | # Collecting some storage info | |
1202 | $sysinfo{'controller'}{$id}{'id'} = $nexus; | |
1203 | $sysinfo{'controller'}{$id}{'name'} = $name; | |
1204 | $sysinfo{'controller'}{$id}{'driver'} = $driver; | |
1205 | $sysinfo{'controller'}{$id}{'firmware'} = $firmware; | |
956cf4d1 | 1206 | $sysinfo{'controller'}{$id}{'storport'} = $stdr; |
669797e1 | 1207 | |
1208 | next CTRL if blacklisted('ctrl', $nexus); | |
1209 | ||
1210 | # Special case: old firmware | |
1211 | if (!blacklisted('ctrl_fw', $id) && defined $minfw) { | |
1212 | chomp $firmware; | |
98b224a3 | 1213 | my $msg = sprintf q{Controller %d [%s]: Firmware '%s' is out of date}, |
669797e1 | 1214 | $id, $name, $firmware; |
1215 | report('storage', $msg, $E_WARNING, $nexus); | |
1216 | } | |
1217 | # Special case: old driver | |
1218 | if (!blacklisted('ctrl_driver', $id) && defined $mindr) { | |
1219 | chomp $driver; | |
98b224a3 | 1220 | my $msg = sprintf q{Controller %d [%s]: Driver '%s' is out of date}, |
669797e1 | 1221 | $id, $name, $driver; |
1222 | report('storage', $msg, $E_WARNING, $nexus); | |
1223 | } | |
08c259f3 | 1224 | # Special case: old storport driver |
1225 | if (!blacklisted('ctrl_stdr', $id) && defined $minstdr) { | |
1226 | chomp $stdr; | |
1227 | my $msg = sprintf q{Controller %d [%s]: Storport driver '%s' is out of date}, | |
1228 | $id, $name, $stdr; | |
1229 | report('storage', $msg, $E_WARNING, $nexus); | |
1230 | } | |
669797e1 | 1231 | # Ok |
1232 | if ($status eq 'Ok' or ($status eq 'Non-Critical' | |
babe647a | 1233 | and (defined $minfw or defined $mindr or defined $minstdr))) { |
98b224a3 | 1234 | my $msg = sprintf 'Controller %d [%s] is %s', |
669797e1 | 1235 | $id, $name, $state; |
1236 | report('storage', $msg, $E_OK, $nexus); | |
1237 | } | |
1238 | # Default | |
1239 | else { | |
98b224a3 | 1240 | my $msg = sprintf 'Controller %d [%s] needs attention: %s', |
669797e1 | 1241 | $id, $name, $state; |
1242 | report('storage', $msg, $status2nagios{$status}, $nexus); | |
1243 | } | |
1244 | } | |
1245 | return; | |
1246 | } | |
1247 | ||
1248 | ||
1249 | #----------------------------------------- | |
1250 | # STORAGE: Check physical drives | |
1251 | #----------------------------------------- | |
1252 | sub check_physical_disks { | |
1253 | return if $#controllers == -1; | |
1254 | ||
1255 | my $id = undef; | |
1256 | my $nexus = undef; | |
1257 | my $name = undef; | |
1258 | my $state = undef; | |
1259 | my $status = undef; | |
1260 | my $fpred = undef; | |
1261 | my $progr = undef; | |
1262 | my $ctrl = undef; | |
1263 | my $vendor = undef; # disk vendor | |
1264 | my $product = undef; # product ID | |
1265 | my $capacity = undef; # disk length (size) in bytes | |
1266 | my @output = (); | |
1267 | ||
1268 | if ($snmp) { | |
1269 | my %pdisk_oid | |
1270 | = ( | |
1271 | '1.3.6.1.4.1.674.10893.1.20.130.4.1.1' => 'arrayDiskNumber', | |
1272 | '1.3.6.1.4.1.674.10893.1.20.130.4.1.2' => 'arrayDiskName', | |
1273 | '1.3.6.1.4.1.674.10893.1.20.130.4.1.3' => 'arrayDiskVendor', | |
1274 | '1.3.6.1.4.1.674.10893.1.20.130.4.1.4' => 'arrayDiskState', | |
1275 | '1.3.6.1.4.1.674.10893.1.20.130.4.1.6' => 'arrayDiskProductID', | |
1276 | '1.3.6.1.4.1.674.10893.1.20.130.4.1.9' => 'arrayDiskEnclosureID', | |
1277 | '1.3.6.1.4.1.674.10893.1.20.130.4.1.10' => 'arrayDiskChannel', | |
1278 | '1.3.6.1.4.1.674.10893.1.20.130.4.1.11' => 'arrayDiskLengthInMB', | |
1279 | '1.3.6.1.4.1.674.10893.1.20.130.4.1.15' => 'arrayDiskTargetID', | |
1280 | '1.3.6.1.4.1.674.10893.1.20.130.4.1.16' => 'arrayDiskLunID', | |
1281 | '1.3.6.1.4.1.674.10893.1.20.130.4.1.24' => 'arrayDiskComponentStatus', | |
1282 | '1.3.6.1.4.1.674.10893.1.20.130.4.1.26' => 'arrayDiskNexusID', | |
1283 | '1.3.6.1.4.1.674.10893.1.20.130.4.1.31' => 'arrayDiskSmartAlertIndication', | |
1284 | '1.3.6.1.4.1.674.10893.1.20.130.5.1.5' => 'arrayDiskEnclosureConnectionEnclosureNumber', | |
1285 | '1.3.6.1.4.1.674.10893.1.20.130.5.1.7' => 'arrayDiskEnclosureConnectionControllerNumber', | |
1286 | ); | |
1287 | my $result = $snmp_session->get_entries(-columns => [keys %pdisk_oid]); | |
1288 | ||
1289 | if (!defined $result) { | |
98b224a3 | 1290 | printf "SNMP ERROR [storage / pdisk]: %s.\n", $snmp_session->error; |
669797e1 | 1291 | $snmp_session->close; |
1292 | exit $E_UNKNOWN; | |
1293 | } | |
1294 | ||
1295 | @output = @{ get_snmp_output($result, \%pdisk_oid) }; | |
1296 | } | |
1297 | else { | |
1298 | foreach my $c (@controllers) { | |
1299 | push @output, @{ run_omreport("storage pdisk controller=$c") }; | |
1300 | map_item('ctrl', $c, \@output); | |
1301 | } | |
1302 | } | |
1303 | ||
1304 | my %pdisk_state | |
1305 | = ( | |
1306 | 0 => 'Unknown', | |
1307 | 1 => 'Ready', | |
1308 | 2 => 'Failed', | |
1309 | 3 => 'Online', | |
1310 | 4 => 'Offline', | |
1311 | 6 => 'Degraded', | |
1312 | 7 => 'Recovering', | |
1313 | 11 => 'Removed', | |
1314 | 15 => 'Resynching', | |
1315 | 24 => 'Rebuilding', | |
1316 | 25 => 'No Media', | |
1317 | 26 => 'Formatting', | |
1318 | 28 => 'Diagnostics', | |
1319 | 34 => 'Predictive failure', | |
1320 | 35 => 'Initializing', | |
1321 | 39 => 'Foreign', | |
1322 | 40 => 'Clear', | |
1323 | 41 => 'Unsupported', | |
1324 | 53 => 'Incompatible', | |
1325 | ); | |
1326 | ||
1327 | # Check physical disks on each of the controllers | |
1328 | PDISK: | |
1329 | foreach my $out (@output) { | |
1330 | if ($snmp) { | |
1331 | $name = $out->{arrayDiskName}; | |
1332 | if ($name =~ m{.*\d+:\d+:\d+\z}xms) { | |
1333 | $id = join q{:}, ($out->{arrayDiskChannel}, $out->{arrayDiskEnclosureID}, | |
1334 | $out->{arrayDiskTargetID}); | |
1335 | } | |
1336 | else { | |
1337 | $id = join q{:}, ($out->{arrayDiskChannel}, $out->{arrayDiskTargetID}); | |
1338 | } | |
1339 | $state = $pdisk_state{$out->{arrayDiskState}}; | |
1340 | $status = $snmp_status{$out->{arrayDiskComponentStatus}}; | |
1341 | $fpred = $out->{arrayDiskSmartAlertIndication} == 2 ? 1 : 0; | |
1342 | $progr = q{}; | |
1343 | $ctrl = exists $out->{arrayDiskEnclosureConnectionControllerNumber} | |
1344 | ? $out->{arrayDiskEnclosureConnectionControllerNumber} - 1 | |
1345 | : -1; | |
1346 | $nexus = convert_nexus($out->{arrayDiskNexusID}); | |
1347 | $vendor = $out->{arrayDiskVendor}; | |
1348 | $product = $out->{arrayDiskProductID}; | |
1349 | $capacity = $out->{arrayDiskLengthInMB} * 1024**2; | |
1350 | } | |
1351 | else { | |
1352 | $id = $out->{'ID'}; | |
1353 | $name = $out->{'Name'}; | |
1354 | $state = $out->{'State'}; | |
1355 | $status = $out->{'Status'}; | |
1356 | $fpred = lc($out->{'Failure Predicted'}) eq 'yes' ? 1 : 0; | |
1357 | $progr = ' [' . $out->{'Progress'} . ']'; | |
1358 | $ctrl = $out->{'ctrl'}; | |
1359 | $nexus = join q{:}, $out->{ctrl}, $id; | |
1360 | $vendor = $out->{'Vendor ID'}; | |
1361 | $product = $out->{'Product ID'}; | |
1362 | $capacity = $out->{'Capacity'}; | |
1363 | $capacity =~ s{\A .*? \((\d+) \s bytes\) \z}{$1}xms; | |
1364 | } | |
1365 | ||
1366 | next PDISK if blacklisted('pdisk', $nexus); | |
1367 | $count{pdisk}++; | |
1368 | ||
1369 | $vendor =~ s{\s+\z}{}xms; # remove trailing whitespace | |
1370 | $product =~ s{\s+\z}{}xms; # remove trailing whitespace | |
1371 | ||
1372 | # Calculate human readable capacity | |
1373 | $capacity = ceil($capacity / 1000**3) >= 1000 | |
1374 | ? sprintf '%.1fTB', ($capacity / 1000**4) | |
1375 | : sprintf '%.0fGB', ($capacity / 1000**3); | |
1376 | $capacity = '450GB' if $capacity eq '449GB'; # quick fix for 450GB disks | |
8ce893fd | 1377 | $capacity = '300GB' if $capacity eq '299GB'; # quick fix for 300GB disks |
669797e1 | 1378 | $capacity = '146GB' if $capacity eq '147GB'; # quick fix for 146GB disks |
669797e1 | 1379 | |
1380 | # Capitalize only the first letter of the vendor name | |
1381 | $vendor = (substr $vendor, 0, 1) . lc (substr $vendor, 1, length $vendor); | |
1382 | ||
1383 | # Remove unnecessary trademark rubbish from vendor name | |
1384 | $vendor =~ s{\(tm\)\z}{}xms; | |
1385 | ||
1386 | # Special case: Failure predicted | |
1387 | if ($status eq 'Non-Critical' and $fpred) { | |
4999e8dd | 1388 | my $msg = sprintf '%s [%s %s, %s] on ctrl %d needs attention: Failure Predicted', |
669797e1 | 1389 | $name, $vendor, $product, $capacity, $ctrl; |
1390 | report('storage', $msg, $E_WARNING, $nexus); | |
1391 | } | |
1392 | # Special case: Rebuilding | |
1393 | elsif ($state eq 'Rebuilding') { | |
fca30c37 | 1394 | my $msg = sprintf '%s [%s] on ctrl %d is %s%s', |
1395 | $name, $capacity, $ctrl, $state, $progr; | |
669797e1 | 1396 | report('storage', $msg, $E_WARNING, $nexus); |
1397 | } | |
1398 | # Default | |
1399 | elsif ($status ne 'Ok') { | |
1ea483c4 | 1400 | my $msg = sprintf '%s [%s %s, %s] on ctrl %d needs attention: %s', |
669797e1 | 1401 | $name, $vendor, $product, $capacity, $ctrl, $state; |
1402 | report('storage', $msg, $status2nagios{$status}, $nexus); | |
1403 | } | |
1404 | # Ok | |
1405 | else { | |
1ea483c4 | 1406 | my $msg = sprintf '%s [%s] on ctrl %d is %s', |
669797e1 | 1407 | $name, $capacity, $ctrl, $state; |
1408 | report('storage', $msg, $E_OK, $nexus); | |
1409 | } | |
1410 | } | |
1411 | return; | |
1412 | } | |
1413 | ||
1414 | ||
1415 | #----------------------------------------- | |
1416 | # STORAGE: Check logical drives | |
1417 | #----------------------------------------- | |
1418 | sub check_virtual_disks { | |
1419 | return if $#controllers == -1; | |
1420 | ||
1421 | my $id = undef; | |
25d04c34 | 1422 | my $name = undef; |
669797e1 | 1423 | my $nexus = undef; |
1424 | my $dev = undef; | |
1425 | my $state = undef; | |
1426 | my $status = undef; | |
1427 | my $layout = undef; | |
1428 | my $size = undef; | |
1429 | my $progr = undef; | |
25d04c34 | 1430 | my $ctrl = undef; |
669797e1 | 1431 | my @output = (); |
1432 | ||
1433 | if ($snmp) { | |
1434 | my %vdisk_oid | |
1435 | = ( | |
669797e1 | 1436 | '1.3.6.1.4.1.674.10893.1.20.140.1.1.3' => 'virtualDiskDeviceName', |
1437 | '1.3.6.1.4.1.674.10893.1.20.140.1.1.4' => 'virtualDiskState', | |
1438 | '1.3.6.1.4.1.674.10893.1.20.140.1.1.6' => 'virtualDiskLengthInMB', | |
1439 | '1.3.6.1.4.1.674.10893.1.20.140.1.1.13' => 'virtualDiskLayout', | |
25d04c34 | 1440 | '1.3.6.1.4.1.674.10893.1.20.140.1.1.17' => 'virtualDiskTargetID', |
669797e1 | 1441 | '1.3.6.1.4.1.674.10893.1.20.140.1.1.20' => 'virtualDiskComponentStatus', |
1442 | '1.3.6.1.4.1.674.10893.1.20.140.1.1.21' => 'virtualDiskNexusID', | |
1443 | ); | |
1444 | my $result = $snmp_session->get_entries(-columns => [keys %vdisk_oid]); | |
1445 | ||
1446 | # No logical drives is OK | |
1447 | return if !defined $result; | |
1448 | ||
1449 | @output = @{ get_snmp_output($result, \%vdisk_oid) }; | |
1450 | } | |
1451 | else { | |
1452 | foreach my $c (@controllers) { | |
1453 | push @output, @{ run_omreport("storage vdisk controller=$c") }; | |
1454 | map_item('ctrl', $c, \@output); | |
1455 | } | |
1456 | } | |
1457 | ||
1458 | my %vdisk_state | |
1459 | = ( | |
1460 | 0 => 'Unknown', | |
1461 | 1 => 'Ready', | |
1462 | 2 => 'Failed', | |
1463 | 3 => 'Online', | |
1464 | 4 => 'Offline', | |
1465 | 6 => 'Degraded', | |
1466 | 15 => 'Resynching', | |
1467 | 16 => 'Regenerating', | |
1468 | 24 => 'Rebuilding', | |
1469 | 26 => 'Formatting', | |
1470 | 32 => 'Reconstructing', | |
1471 | 35 => 'Initializing', | |
1472 | 36 => 'Background Initialization', | |
1473 | 38 => 'Resynching Paused', | |
1474 | 52 => 'Permanently Degraded', | |
1475 | 54 => 'Degraded Redundancy', | |
1476 | ); | |
1477 | ||
1478 | my %vdisk_layout | |
1479 | = ( | |
1480 | 1 => 'Concatenated', | |
1481 | 2 => 'RAID-0', | |
1482 | 3 => 'RAID-1', | |
1483 | 7 => 'RAID-5', | |
1484 | 8 => 'RAID-6', | |
1485 | 10 => 'RAID-10', | |
1486 | 12 => 'RAID-50', | |
1487 | 19 => 'Concatenated RAID 1', | |
1488 | 24 => 'RAID-60', | |
1489 | ); | |
1490 | ||
1491 | # Check virtual disks on each of the controllers | |
1492 | VDISK: | |
1493 | foreach my $out (@output) { | |
1494 | if ($snmp) { | |
25d04c34 | 1495 | $id = $out->{virtualDiskTargetID}; |
669797e1 | 1496 | $dev = $out->{virtualDiskDeviceName}; |
1497 | $state = $vdisk_state{$out->{virtualDiskState}}; | |
1498 | $status = $snmp_status{$out->{virtualDiskComponentStatus}}; | |
1499 | $layout = $vdisk_layout{$out->{virtualDiskLayout}}; | |
1500 | $size = sprintf '%.2f GB', $out->{virtualDiskLengthInMB} / 1024; | |
1501 | $progr = q{}; # can't get this from SNMP(?) | |
1502 | $nexus = convert_nexus($out->{virtualDiskNexusID}); | |
25d04c34 | 1503 | $ctrl = $nexus; # We use the nexus id to get the controller id |
1504 | $ctrl =~ s{\A (\d+):\d+ \z}{$1}xms; | |
669797e1 | 1505 | } |
1506 | else { | |
1507 | $id = $out->{ID}; | |
1508 | $dev = $out->{'Device Name'}; | |
1509 | $state = $out->{State}; | |
1510 | $status = $out->{Status}; | |
1511 | $layout = $out->{Layout}; | |
1512 | $size = $out->{Size}; | |
1513 | $progr = ' [' . $out->{Progress} . ']'; | |
1514 | $size =~ s{\A (.*GB).* \z}{$1}xms; | |
1515 | $nexus = join q{:}, $out->{ctrl}, $id; | |
25d04c34 | 1516 | $ctrl = $out->{ctrl}; |
669797e1 | 1517 | } |
1518 | ||
1519 | next VDISK if blacklisted('vdisk', $nexus); | |
1520 | $count{vdisk}++; | |
1521 | ||
04b0f13b | 1522 | # The device name is undefined sometimes |
1523 | $dev = q{} if !defined $dev; | |
1524 | ||
669797e1 | 1525 | # Special case: Regenerating |
1526 | if ($state eq 'Regenerating') { | |
98b224a3 | 1527 | my $msg = sprintf q{Logical drive %d '%s' [%s, %s] on ctrl %d is %s%s}, |
1528 | $id, $dev, $layout, $size, $ctrl, $state, $progr; | |
669797e1 | 1529 | report('storage', $msg, $E_WARNING, $nexus); |
1530 | } | |
1531 | # Default | |
1532 | elsif ($status ne 'Ok') { | |
98b224a3 | 1533 | my $msg = sprintf q{Logical drive %d '%s' [%s, %s] on ctrl %d needs attention: %s}, |
1534 | $id, $dev, $layout, $size, $ctrl, $state; | |
669797e1 | 1535 | report('storage', $msg, $status2nagios{$status}, $nexus); |
1536 | } | |
1537 | # Ok | |
1538 | else { | |
98b224a3 | 1539 | my $msg = sprintf q{Logical drive %d '%s' [%s, %s] on ctrl %d is %s}, |
1540 | $id, $dev, $layout, $size, $ctrl, $state; | |
669797e1 | 1541 | report('storage', $msg, $E_OK, $nexus); |
1542 | } | |
1543 | } | |
1544 | return; | |
1545 | } | |
1546 | ||
1547 | ||
1548 | #----------------------------------------- | |
1549 | # STORAGE: Check cache batteries | |
1550 | #----------------------------------------- | |
1551 | sub check_cache_battery { | |
1552 | return if $#controllers == -1; | |
1553 | ||
1554 | my $id = undef; | |
1555 | my $nexus = undef; | |
1556 | my $state = undef; | |
1557 | my $status = undef; | |
1558 | my $ctrl = undef; | |
1559 | my $learn = undef; # learn state | |
1560 | my $pred = undef; # battery's ability to be charged | |
1561 | my @output = (); | |
1562 | ||
1563 | if ($snmp) { | |
1564 | my %bat_oid | |
1565 | = ( | |
669797e1 | 1566 | '1.3.6.1.4.1.674.10893.1.20.130.15.1.4' => 'batteryState', |
1567 | '1.3.6.1.4.1.674.10893.1.20.130.15.1.6' => 'batteryComponentStatus', | |
1568 | '1.3.6.1.4.1.674.10893.1.20.130.15.1.9' => 'batteryNexusID', | |
1569 | '1.3.6.1.4.1.674.10893.1.20.130.15.1.10' => 'batteryPredictedCapacity', | |
1570 | '1.3.6.1.4.1.674.10893.1.20.130.15.1.12' => 'batteryLearnState', | |
1571 | '1.3.6.1.4.1.674.10893.1.20.130.16.1.5' => 'batteryConnectionControllerNumber', | |
1572 | ); | |
1573 | my $result = $snmp_session->get_entries(-columns => [keys %bat_oid]); | |
1574 | ||
1575 | # No cache battery is OK | |
1576 | return if !defined $result; | |
1577 | ||
1578 | @output = @{ get_snmp_output($result, \%bat_oid) }; | |
1579 | } | |
1580 | else { | |
1581 | foreach my $c (@controllers) { | |
1582 | push @output, @{ run_omreport("storage battery controller=$c") }; | |
1583 | map_item('ctrl', $c, \@output); | |
1584 | } | |
1585 | } | |
1586 | ||
1587 | my %bat_state | |
1588 | = ( | |
1589 | 0 => 'Unknown', | |
1590 | 1 => 'Ready', | |
1591 | 2 => 'Failed', | |
1592 | 6 => 'Degraded', | |
1593 | 7 => 'Reconditioning', | |
1594 | 9 => 'High', | |
1595 | 10 => 'Power Low', | |
1596 | 12 => 'Charging', | |
1597 | 21 => 'Missing', | |
1598 | 36 => 'Learning', | |
1599 | ); | |
1600 | ||
a49bcfe8 | 1601 | # Specifies the learn state activity of the battery |
669797e1 | 1602 | my %bat_learn_state |
1603 | = ( | |
1604 | 1 => 'Failed', | |
1605 | 2 => 'Active', | |
1606 | 4 => 'Timed out', | |
1607 | 8 => 'Requested', | |
1608 | 16 => 'Idle', | |
1609 | ); | |
1610 | ||
a49bcfe8 | 1611 | # This property displays the battery's ability to be charged |
669797e1 | 1612 | my %bat_pred_cap |
1613 | = ( | |
1614 | 1 => 'Failed', # The battery cannot be charged and needs to be replaced | |
1615 | 2 => 'Ready', # The battery can be charged to full capacity | |
1616 | 4 => 'Unknown', # The battery is completing a Learn cycle. The charge capacity of the | |
1617 | # battery cannot be determined until the Learn cycle is complete | |
1618 | ); | |
1619 | ||
1620 | # Check battery on each of the controllers | |
1621 | BATTERY: | |
1622 | foreach my $out (@output) { | |
1623 | if ($snmp) { | |
669797e1 | 1624 | $state = $bat_state{$out->{batteryState}}; |
1625 | $status = $snmp_status{$out->{batteryComponentStatus}}; | |
1626 | $learn = exists $out->{batteryLearnState} | |
1627 | ? $bat_learn_state{$out->{batteryLearnState}} : undef; | |
1628 | $pred = exists $out->{batteryPredictedCapacity} | |
1629 | ? $bat_pred_cap{$out->{batteryPredictedCapacity}} : undef; | |
1630 | $ctrl = $out->{batteryConnectionControllerNumber} - 1; | |
1631 | $nexus = convert_nexus($out->{batteryNexusID}); | |
25d04c34 | 1632 | $id = $nexus; |
1633 | $id =~ s{\A \d+:(\d+) \z}{$1}xms; | |
669797e1 | 1634 | } |
1635 | else { | |
1636 | $id = $out->{'ID'}; | |
1637 | $state = $out->{'State'}; | |
1638 | $status = $out->{'Status'}; | |
1639 | $learn = $out->{'Learn State'}; | |
1640 | $pred = $out->{'Predicted Capacity Status'}; | |
1641 | $ctrl = $out->{'ctrl'}; | |
1642 | $nexus = join q{:}, $out->{ctrl}, $id; | |
1643 | } | |
1644 | ||
1645 | next BATTERY if blacklisted('bat', $nexus); | |
1646 | ||
1647 | # Special case: Charging | |
1648 | if ($state eq 'Charging') { | |
50d6bc4a | 1649 | if ($pred eq 'Failed') { |
1650 | my $msg = sprintf 'Cache battery %d in controller %d is %s (%s) [replace battery]', | |
1651 | $id, $ctrl, $state, $pred; | |
1652 | report('storage', $msg, $E_CRITICAL, $nexus); | |
1653 | } | |
1654 | else { | |
1655 | next BATTERY if blacklisted('bat_charge', $nexus); | |
1656 | my $msg = sprintf 'Cache battery %d in controller %d is %s (%s) [probably harmless]', | |
1657 | $id, $ctrl, $state, $pred; | |
1658 | report('storage', $msg, $E_WARNING, $nexus); | |
1659 | } | |
669797e1 | 1660 | } |
1661 | # Special case: Learning (battery learns its capacity) | |
1662 | elsif ($state eq 'Learning') { | |
50d6bc4a | 1663 | if ($learn eq 'Failed') { |
1664 | my $msg = sprintf 'Cache battery %d in controller %d is %s (%s)', | |
1665 | $id, $ctrl, $state, $learn; | |
1666 | report('storage', $msg, $E_CRITICAL, $nexus); | |
1667 | } | |
1668 | else { | |
1669 | next BATTERY if blacklisted('bat_charge', $nexus); | |
1670 | my $msg = sprintf 'Cache battery %d in controller %d is %s (%s) [probably harmless]', | |
1671 | $id, $ctrl, $state, $learn; | |
1672 | report('storage', $msg, $E_WARNING, $nexus); | |
1673 | } | |
669797e1 | 1674 | } |
1675 | # Special case: Power Low (first part of recharge cycle) | |
1676 | elsif ($state eq 'Power Low') { | |
5a28cf7f | 1677 | next BATTERY if blacklisted('bat_charge', $nexus); |
669797e1 | 1678 | my $msg = sprintf 'Cache battery %d in controller %d is %s [probably harmless]', |
1679 | $id, $ctrl, $state; | |
1680 | report('storage', $msg, $E_WARNING, $nexus); | |
1681 | } | |
5a28cf7f | 1682 | # Special case: Degraded and Non-Critical (usually part of recharge cycle) |
1683 | elsif ($state eq 'Degraded' && $status eq 'Non-Critical') { | |
1684 | next BATTERY if blacklisted('bat_charge', $nexus); | |
1685 | my $msg = sprintf 'Cache battery %d in controller %d is %s (%s) [probably harmless]', | |
1686 | $id, $ctrl, $state, $status; | |
1687 | report('storage', $msg, $E_WARNING, $nexus); | |
1688 | } | |
669797e1 | 1689 | # Default |
1690 | elsif ($status ne 'Ok') { | |
1691 | my $msg = sprintf 'Cache battery %d in controller %d needs attention: %s (%s)', | |
1692 | $id, $ctrl, $state, $status; | |
1693 | report('storage', $msg, $status2nagios{$status}, $nexus); | |
1694 | } | |
1695 | # Ok | |
1696 | else { | |
1697 | my $msg = sprintf 'Cache battery %d in controller %d is %s', | |
1698 | $id, $ctrl, $state; | |
1699 | report('storage', $msg, $E_OK, $nexus); | |
1700 | } | |
1701 | } | |
1702 | return; | |
1703 | } | |
1704 | ||
1705 | ||
1706 | #----------------------------------------- | |
1707 | # STORAGE: Check connectors (channels) | |
1708 | #----------------------------------------- | |
1709 | sub check_connectors { | |
1710 | return if $#controllers == -1; | |
1711 | ||
1712 | my $id = undef; | |
1713 | my $nexus = undef; | |
1714 | my $name = undef; | |
1715 | my $state = undef; | |
1716 | my $status = undef; | |
1717 | my $type = undef; | |
1718 | my $ctrl = undef; | |
1719 | my @output = (); | |
1720 | ||
1721 | if ($snmp) { | |
1722 | my %conn_oid | |
1723 | = ( | |
1724 | '1.3.6.1.4.1.674.10893.1.20.130.2.1.1' => 'channelNumber', | |
1725 | '1.3.6.1.4.1.674.10893.1.20.130.2.1.2' => 'channelName', | |
1726 | '1.3.6.1.4.1.674.10893.1.20.130.2.1.3' => 'channelState', | |
1727 | '1.3.6.1.4.1.674.10893.1.20.130.2.1.8' => 'channelComponentStatus', | |
1728 | '1.3.6.1.4.1.674.10893.1.20.130.2.1.9' => 'channelNexusID', | |
1729 | '1.3.6.1.4.1.674.10893.1.20.130.2.1.11' => 'channelBusType', | |
1730 | ); | |
1731 | my $result = $snmp_session->get_entries(-columns => [keys %conn_oid]); | |
1732 | ||
1733 | if (!defined $result) { | |
98b224a3 | 1734 | printf "SNMP ERROR [storage / channel]: %s.\n", $snmp_session->error; |
669797e1 | 1735 | $snmp_session->close; |
1736 | exit $E_UNKNOWN; | |
1737 | } | |
1738 | ||
1739 | @output = @{ get_snmp_output($result, \%conn_oid) }; | |
1740 | } | |
1741 | else { | |
1742 | foreach my $c (@controllers) { | |
1743 | push @output, @{ run_omreport("storage connector controller=$c") }; | |
1744 | map_item('ctrl', $c, \@output); | |
1745 | } | |
1746 | } | |
1747 | ||
1748 | my %conn_state | |
1749 | = ( | |
1750 | 0 => 'Unknown', | |
1751 | 1 => 'Ready', | |
1752 | 2 => 'Failed', | |
1753 | 3 => 'Online', | |
1754 | 4 => 'Offline', | |
1755 | 6 => 'Degraded', | |
1756 | ); | |
1757 | ||
1758 | my %conn_bustype | |
1759 | = ( | |
1760 | 1 => 'SCSI', | |
1761 | 2 => 'IDE', | |
1762 | 3 => 'Fibre Channel', | |
1763 | 4 => 'SSA', | |
1764 | 6 => 'USB', | |
1765 | 7 => 'SATA', | |
1766 | 8 => 'SAS', | |
1767 | ); | |
1768 | ||
1769 | # Check connectors on each of the controllers | |
1770 | CHANNEL: | |
1771 | foreach my $out (@output) { | |
1772 | if ($snmp) { | |
1773 | $id = $out->{channelNumber} - 1; | |
1774 | $name = $out->{channelName}; | |
1775 | $state = $conn_state{$out->{channelState}}; | |
1776 | $status = $snmp_status{$out->{channelComponentStatus}}; | |
1777 | $type = $conn_bustype{$out->{channelBusType}}; | |
1778 | $nexus = convert_nexus($out->{channelNexusID}); | |
1779 | $ctrl = $nexus; | |
1780 | $ctrl =~ s{(\d+):\d+}{$1}xms; | |
1781 | } | |
1782 | else { | |
1783 | $id = $out->{'ID'}; | |
1784 | $name = $out->{'Name'}; | |
1785 | $state = $out->{'State'}; | |
1786 | $status = $out->{'Status'}; | |
1787 | $type = $out->{'Connector Type'}; | |
1788 | $ctrl = $out->{ctrl}; | |
1789 | $nexus = join q{:}, $out->{ctrl}, $id; | |
1790 | } | |
1791 | ||
1792 | next CHANNEL if blacklisted('conn', $nexus); | |
1793 | ||
98b224a3 | 1794 | my $msg = sprintf '%s [%s] on controller %d is %s', |
669797e1 | 1795 | $name, $type, $ctrl, $state; |
1796 | report('storage', $msg, $status2nagios{$status}, $nexus); | |
1797 | } | |
1798 | return; | |
1799 | } | |
1800 | ||
1801 | ||
1802 | #----------------------------------------- | |
1803 | # STORAGE: Check enclosures | |
1804 | #----------------------------------------- | |
1805 | sub check_enclosures { | |
1806 | my $id = undef; | |
1807 | my $nexus = undef; | |
1808 | my $name = undef; | |
1809 | my $state = undef; | |
1810 | my $status = undef; | |
1811 | my $firmware = undef; | |
25d04c34 | 1812 | my $ctrl = undef; |
669797e1 | 1813 | my @output = (); |
1814 | ||
1815 | if ($snmp) { | |
1816 | my %encl_oid | |
1817 | = ( | |
1818 | '1.3.6.1.4.1.674.10893.1.20.130.3.1.1' => 'enclosureNumber', | |
1819 | '1.3.6.1.4.1.674.10893.1.20.130.3.1.2' => 'enclosureName', | |
1820 | '1.3.6.1.4.1.674.10893.1.20.130.3.1.4' => 'enclosureState', | |
1821 | '1.3.6.1.4.1.674.10893.1.20.130.3.1.19' => 'enclosureChannelNumber', | |
1822 | '1.3.6.1.4.1.674.10893.1.20.130.3.1.24' => 'enclosureComponentStatus', | |
1823 | '1.3.6.1.4.1.674.10893.1.20.130.3.1.25' => 'enclosureNexusID', | |
1824 | '1.3.6.1.4.1.674.10893.1.20.130.3.1.26' => 'enclosureFirmwareVersion', | |
1825 | ); | |
1826 | my $result = $snmp_session->get_entries(-columns => [keys %encl_oid]); | |
1827 | ||
1828 | # No enclosures is OK | |
1829 | return if !defined $result; | |
1830 | ||
1831 | @output = @{ get_snmp_output($result, \%encl_oid) }; | |
1832 | } | |
1833 | else { | |
1834 | foreach my $c (@controllers) { | |
1835 | push @output, @{ run_omreport("storage enclosure controller=$c") }; | |
1836 | map_item('ctrl', $c, \@output); | |
1837 | } | |
1838 | } | |
1839 | ||
1840 | my %encl_state | |
1841 | = ( | |
1842 | 0 => 'Unknown', | |
1843 | 1 => 'Ready', | |
1844 | 2 => 'Failed', | |
1845 | 3 => 'Online', | |
1846 | 4 => 'Offline', | |
1847 | 6 => 'Degraded', | |
1848 | ); | |
1849 | ||
1850 | ENCLOSURE: | |
1851 | foreach my $out (@output) { | |
1852 | if ($snmp) { | |
1853 | $id = $out->{'enclosureNumber'} - 1; | |
1854 | $name = $out->{'enclosureName'}; | |
1855 | $state = $encl_state{$out->{'enclosureState'}}; | |
1856 | $status = $snmp_status{$out->{'enclosureComponentStatus'}}; | |
1857 | $firmware = exists $out->{enclosureFirmwareVersion} | |
1858 | ? $out->{enclosureFirmwareVersion} : 'N/A'; | |
1859 | $nexus = convert_nexus($out->{enclosureNexusID}); | |
25d04c34 | 1860 | $ctrl = $nexus; |
1861 | $ctrl =~ s{\A (\d+):.* \z}{$1}xms; | |
669797e1 | 1862 | } |
1863 | else { | |
1864 | $id = $out->{ID}; | |
1865 | $name = $out->{Name}; | |
1866 | $state = $out->{State}; | |
1867 | $status = $out->{Status}; | |
1868 | $firmware = $out->{'Firmware Version'} ne 'Not Applicable' | |
1869 | ? $out->{'Firmware Version'} : 'N/A'; | |
1870 | $nexus = join q{:}, $out->{ctrl}, $id; | |
25d04c34 | 1871 | $ctrl = $out->{ctrl}; |
669797e1 | 1872 | } |
1873 | ||
1874 | $name =~ s{\s+\z}{}xms; # remove trailing whitespace | |
1875 | $firmware =~ s{\s+\z}{}xms; # remove trailing whitespace | |
1876 | ||
1877 | # store enclosure data for future use | |
1878 | push @enclosures, { 'id' => $id, | |
1879 | 'ctrl' => $out->{ctrl}, | |
1880 | 'name' => $name }; | |
1881 | ||
1882 | # Collecting some storage info | |
1883 | $sysinfo{'enclosure'}{$nexus}{'id'} = $nexus; | |
1884 | $sysinfo{'enclosure'}{$nexus}{'name'} = $name; | |
1885 | $sysinfo{'enclosure'}{$nexus}{'firmware'} = $firmware; | |
1886 | ||
1887 | next ENCLOSURE if blacklisted('encl', $nexus); | |
1888 | ||
98b224a3 | 1889 | my $msg = sprintf 'Enclosure %s [%s] on controller %d is %s', |
25d04c34 | 1890 | $nexus, $name, $ctrl, $state; |
669797e1 | 1891 | report('storage', $msg, $status2nagios{$status}, $nexus); |
1892 | } | |
1893 | return; | |
1894 | } | |
1895 | ||
1896 | ||
1897 | #----------------------------------------- | |
1898 | # STORAGE: Check enclosure fans | |
1899 | #----------------------------------------- | |
1900 | sub check_enclosure_fans { | |
1901 | return if $#controllers == -1; | |
1902 | ||
1903 | my $id = undef; | |
1904 | my $nexus = undef; | |
1905 | my $name = undef; | |
1906 | my $state = undef; | |
1907 | my $status = undef; | |
1908 | my $speed = undef; | |
1909 | my $encl_id = undef; | |
1910 | my $encl_name = undef; | |
1911 | my @output = (); | |
1912 | ||
1913 | if ($snmp) { | |
1914 | my %fan_oid | |
1915 | = ( | |
1916 | '1.3.6.1.4.1.674.10893.1.20.130.7.1.1' => 'fanNumber', | |
1917 | '1.3.6.1.4.1.674.10893.1.20.130.7.1.2' => 'fanName', | |
1918 | '1.3.6.1.4.1.674.10893.1.20.130.7.1.4' => 'fanState', | |
1919 | '1.3.6.1.4.1.674.10893.1.20.130.7.1.11' => 'fanProbeCurrValue', | |
1920 | '1.3.6.1.4.1.674.10893.1.20.130.7.1.15' => 'fanComponentStatus', | |
1921 | '1.3.6.1.4.1.674.10893.1.20.130.7.1.16' => 'fanNexusID', | |
1922 | '1.3.6.1.4.1.674.10893.1.20.130.8.1.4' => 'fanConnectionEnclosureName', | |
1923 | '1.3.6.1.4.1.674.10893.1.20.130.8.1.5' => 'fanConnectionEnclosureNumber', | |
1924 | ); | |
1925 | ||
1926 | my $result = $snmp_session->get_entries(-columns => [keys %fan_oid]); | |
1927 | ||
1928 | # No enclosure fans is OK | |
1929 | return if !defined $result; | |
1930 | ||
1931 | @output = @{ get_snmp_output($result, \%fan_oid) }; | |
1932 | } | |
1933 | else { | |
1934 | foreach my $enc (@enclosures) { | |
1935 | push @output, @{ run_omreport("storage enclosure controller=$enc->{ctrl} enclosure=$enc->{id} info=fans") }; | |
1936 | map_item('ctrl', $enc->{ctrl}, \@output); | |
1937 | map_item('encl_id', $enc->{id}, \@output); | |
1938 | map_item('encl_name', $enc->{name}, \@output); | |
1939 | } | |
1940 | } | |
1941 | ||
1942 | my %fan_state | |
1943 | = ( | |
1944 | 0 => 'Unknown', | |
1945 | 1 => 'Ready', | |
1946 | 2 => 'Failed', | |
1947 | 3 => 'Online', | |
1948 | 4 => 'Offline', | |
1949 | 6 => 'Degraded', | |
1950 | 21 => 'Missing', | |
1951 | ); | |
1952 | ||
1953 | # Check fans on each of the enclosures | |
1954 | FAN: | |
1955 | foreach my $out (@output) { | |
1956 | if ($snmp) { | |
1957 | $id = $out->{fanNumber} - 1; | |
1958 | $name = $out->{fanName}; | |
1959 | $state = $fan_state{$out->{fanState}}; | |
1960 | $status = $snmp_status{$out->{fanComponentStatus}}; | |
1961 | $speed = $out->{fanProbeCurrValue}; | |
1962 | $encl_id = $out->{fanConnectionEnclosureNumber} - 1; | |
1963 | $encl_name = $out->{fanConnectionEnclosureName}; | |
1964 | $nexus = convert_nexus($out->{fanNexusID}); | |
1965 | } | |
1966 | else { | |
1967 | $id = $out->{'ID'}; | |
1968 | $name = $out->{'Name'}; | |
1969 | $state = $out->{'State'}; | |
1970 | $status = $out->{'Status'}; | |
1971 | $speed = $out->{'Speed'}; | |
1972 | $encl_id = join q{:}, $out->{ctrl}, $out->{'encl_id'}; | |
1973 | $encl_name = $out->{encl_name}; | |
1974 | $nexus = join q{:}, $out->{ctrl}, $out->{'encl_id'}, $id; | |
1975 | } | |
1976 | ||
1977 | next FAN if blacklisted('encl_fan', $nexus); | |
1978 | ||
1979 | # Default | |
1980 | if ($status ne 'Ok') { | |
98b224a3 | 1981 | my $msg = sprintf '%s in enclosure %s [%s] needs attention: %s', |
669797e1 | 1982 | $name, $encl_id, $encl_name, $state; |
1983 | report('storage', $msg, $status2nagios{$status}, $nexus); | |
1984 | } | |
1985 | # Ok | |
1986 | else { | |
98b224a3 | 1987 | my $msg = sprintf '%s in enclosure %s [%s] is %s (speed=%s)', |
669797e1 | 1988 | $name, $encl_id, $encl_name, $state, $speed; |
1989 | report('storage', $msg, $E_OK, $nexus); | |
1990 | } | |
1991 | } | |
1992 | return; | |
1993 | } | |
1994 | ||
1995 | ||
1996 | #----------------------------------------- | |
1997 | # STORAGE: Check enclosure power supplies | |
1998 | #----------------------------------------- | |
1999 | sub check_enclosure_pwr { | |
2000 | return if $#controllers == -1; | |
2001 | ||
2002 | my $id = undef; | |
2003 | my $nexus = undef; | |
2004 | my $name = undef; | |
2005 | my $state = undef; | |
2006 | my $status = undef; | |
2007 | my $encl_id = undef; | |
2008 | my $encl_name = undef; | |
2009 | my @output = (); | |
2010 | ||
2011 | if ($snmp) { | |
2012 | my %ps_oid | |
2013 | = ( | |
2014 | '1.3.6.1.4.1.674.10893.1.20.130.9.1.1' => 'powerSupplyNumber', | |
2015 | '1.3.6.1.4.1.674.10893.1.20.130.9.1.2' => 'powerSupplyName', | |
2016 | '1.3.6.1.4.1.674.10893.1.20.130.9.1.4' => 'powerSupplyState', | |
2017 | '1.3.6.1.4.1.674.10893.1.20.130.9.1.9' => 'powerSupplyComponentStatus', | |
2018 | '1.3.6.1.4.1.674.10893.1.20.130.9.1.10' => 'powerSupplyNexusID', | |
2019 | '1.3.6.1.4.1.674.10893.1.20.130.10.1.4' => 'powerSupplyConnectionEnclosureName', | |
2020 | '1.3.6.1.4.1.674.10893.1.20.130.10.1.5' => 'powerSupplyConnectionEnclosureNumber', | |
2021 | ); | |
2022 | my $result = $snmp_session->get_entries(-columns => [keys %ps_oid]); | |
2023 | ||
2024 | # No enclosure power supplies is OK | |
2025 | return if !defined $result; | |
2026 | ||
2027 | @output = @{ get_snmp_output($result, \%ps_oid) }; | |
2028 | } | |
2029 | else { | |
2030 | foreach my $enc (@enclosures) { | |
2031 | push @output, @{ run_omreport("storage enclosure controller=$enc->{ctrl} enclosure=$enc->{id} info=pwrsupplies") }; | |
2032 | map_item('ctrl', $enc->{ctrl}, \@output); | |
2033 | map_item('encl_id', $enc->{id}, \@output); | |
2034 | map_item('encl_name', $enc->{name}, \@output); | |
2035 | } | |
2036 | } | |
2037 | ||
2038 | my %ps_state | |
2039 | = ( | |
2040 | 0 => 'Unknown', | |
2041 | 1 => 'Ready', | |
2042 | 2 => 'Failed', | |
2043 | 5 => 'Not Installed', | |
2044 | 6 => 'Degraded', | |
2045 | 11 => 'Removed', | |
2046 | 21 => 'Missing', | |
2047 | ); | |
2048 | ||
2049 | # Check power supplies on each of the enclosures | |
2050 | PS: | |
2051 | foreach my $out (@output) { | |
2052 | if ($snmp) { | |
2053 | $id = $out->{powerSupplyNumber}; | |
2054 | $name = $out->{powerSupplyName}; | |
2055 | $state = $ps_state{$out->{powerSupplyState}}; | |
2056 | $status = $snmp_status{$out->{powerSupplyComponentStatus}}; | |
2057 | $encl_id = $out->{powerSupplyConnectionEnclosureNumber} - 1; | |
2058 | $encl_name = $out->{powerSupplyConnectionEnclosureName}; | |
2059 | $nexus = convert_nexus($out->{powerSupplyNexusID}); | |
2060 | } | |
2061 | else { | |
2062 | $id = $out->{'ID'}; | |
2063 | $name = $out->{'Name'}; | |
2064 | $state = $out->{'State'}; | |
2065 | $status = $out->{'Status'}; | |
2066 | $encl_id = join q{:}, $out->{ctrl}, $out->{'encl_id'}; | |
2067 | $encl_name = $out->{encl_name}; | |
2068 | $nexus = join q{:}, $out->{ctrl}, $out->{'encl_id'}, $id; | |
2069 | } | |
2070 | ||
2071 | next PS if blacklisted('encl_ps', $nexus); | |
2072 | ||
2073 | # Default | |
2074 | if ($status ne 'Ok') { | |
98b224a3 | 2075 | my $msg = sprintf '%s in enclosure %s [%s] needs attention: %s', |
669797e1 | 2076 | $name, $encl_id, $encl_name, $state; |
2077 | report('storage', $msg, $status2nagios{$status}, $nexus); | |
2078 | } | |
2079 | # Ok | |
2080 | else { | |
98b224a3 | 2081 | my $msg = sprintf '%s in enclosure %s [%s] is %s', |
669797e1 | 2082 | $name, $encl_id, $encl_name, $state; |
2083 | report('storage', $msg, $E_OK, $nexus); | |
2084 | } | |
2085 | } | |
2086 | return; | |
2087 | } | |
2088 | ||
2089 | ||
2090 | #----------------------------------------- | |
2091 | # STORAGE: Check enclosure temperatures | |
2092 | #----------------------------------------- | |
2093 | sub check_enclosure_temp { | |
2094 | return if $#controllers == -1; | |
2095 | ||
2096 | my $id = undef; | |
2097 | my $nexus = undef; | |
2098 | my $name = undef; | |
2099 | my $state = undef; | |
2100 | my $status = undef; | |
2101 | my $reading = undef; | |
2102 | my $unit = undef; | |
2103 | my $max_warn = undef; | |
2104 | my $max_crit = undef; | |
2105 | my $encl_id = undef; | |
2106 | my $encl_name = undef; | |
2107 | my @output = (); | |
2108 | ||
2109 | if ($snmp) { | |
2110 | my %temp_oid | |
2111 | = ( | |
2112 | '1.3.6.1.4.1.674.10893.1.20.130.11.1.1' => 'temperatureProbeNumber', | |
2113 | '1.3.6.1.4.1.674.10893.1.20.130.11.1.2' => 'temperatureProbeName', | |
2114 | '1.3.6.1.4.1.674.10893.1.20.130.11.1.4' => 'temperatureProbeState', | |
2115 | '1.3.6.1.4.1.674.10893.1.20.130.11.1.6' => 'temperatureProbeUnit', | |
2116 | '1.3.6.1.4.1.674.10893.1.20.130.11.1.9' => 'temperatureProbeMaxWarning', | |
2117 | '1.3.6.1.4.1.674.10893.1.20.130.11.1.10' => 'temperatureProbeMaxCritical', | |
2118 | '1.3.6.1.4.1.674.10893.1.20.130.11.1.11' => 'temperatureProbeCurValue', | |
2119 | '1.3.6.1.4.1.674.10893.1.20.130.11.1.13' => 'temperatureProbeComponentStatus', | |
2120 | '1.3.6.1.4.1.674.10893.1.20.130.11.1.14' => 'temperatureProbeNexusID', | |
2121 | '1.3.6.1.4.1.674.10893.1.20.130.12.1.4' => 'temperatureConnectionEnclosureName', | |
2122 | '1.3.6.1.4.1.674.10893.1.20.130.12.1.5' => 'temperatureConnectionEnclosureNumber', | |
2123 | ); | |
2124 | my $result = $snmp_session->get_entries(-columns => [keys %temp_oid]); | |
2125 | ||
2126 | # No enclosure temperature probes is OK | |
2127 | return if !defined $result; | |
2128 | ||
2129 | @output = @{ get_snmp_output($result, \%temp_oid) }; | |
2130 | } | |
2131 | else { | |
2132 | foreach my $enc (@enclosures) { | |
2133 | push @output, @{ run_omreport("storage enclosure controller=$enc->{ctrl} enclosure=$enc->{id} info=temps") }; | |
2134 | map_item('ctrl', $enc->{ctrl}, \@output); | |
2135 | map_item('encl_id', $enc->{id}, \@output); | |
2136 | map_item('encl_name', $enc->{name}, \@output); | |
2137 | } | |
2138 | } | |
2139 | ||
2140 | my %temp_state | |
2141 | = ( | |
2142 | 0 => 'Unknown', | |
2143 | 1 => 'Ready', | |
2144 | 2 => 'Failed', | |
2145 | 4 => 'Offline', | |
2146 | 6 => 'Degraded', | |
2147 | 9 => 'Inactive', | |
2148 | 21 => 'Missing', | |
2149 | ); | |
2150 | ||
2151 | # Check temperature probes on each of the enclosures | |
2152 | TEMP: | |
2153 | foreach my $out (@output) { | |
2154 | if ($snmp) { | |
2155 | $id = $out->{temperatureProbeNumber} - 1; | |
2156 | $name = $out->{temperatureProbeName}; | |
2157 | $state = $temp_state{$out->{temperatureProbeState}}; | |
2158 | $status = $snmp_status{$out->{temperatureProbeComponentStatus}}; | |
2159 | $unit = $out->{temperatureProbeUnit}; | |
2160 | $reading = $out->{temperatureProbeCurValue}; | |
2161 | $max_warn = $out->{temperatureProbeMaxWarning}; | |
2162 | $max_crit = $out->{temperatureProbeMaxCritical}; | |
2163 | $encl_id = $out->{temperatureConnectionEnclosureNumber} - 1; | |
2164 | $encl_name = $out->{temperatureConnectionEnclosureName}; | |
2165 | $nexus = convert_nexus($out->{temperatureProbeNexusID}); | |
2166 | } | |
2167 | else { | |
2168 | $id = $out->{'ID'}; | |
2169 | $name = $out->{'Name'}; | |
2170 | $state = $out->{'State'}; | |
2171 | $status = $out->{'Status'}; | |
2172 | $unit = 'FIXME'; | |
2173 | $reading = $out->{'Reading'}; $reading =~ s{\s*C}{}xms; | |
2174 | $max_warn = $out->{'Maximum Warning Threshold'}; $max_warn =~ s{\s*C}{}xms; | |
2175 | $max_crit = $out->{'Maximum Failure Threshold'}; $max_crit =~ s{\s*C}{}xms; | |
2176 | $encl_id = join q{:}, $out->{ctrl}, $out->{'encl_id'}; | |
2177 | $encl_name = $out->{encl_name}; | |
2178 | $nexus = join q{:}, $out->{ctrl}, $out->{'encl_id'}, $id; | |
2179 | } | |
2180 | ||
2181 | next TEMP if blacklisted('encl_temp', $nexus); | |
2182 | ||
2183 | # Default | |
2184 | if ($status ne 'Ok') { | |
8a565bfc | 2185 | my $msg = sprintf '%s in enclosure %s [%s] is %s C at %s (%s max)', |
669797e1 | 2186 | $name, $encl_id, $encl_name, $state, $reading, $max_crit; |
2187 | report('storage', $msg, $status2nagios{$status}, $nexus); | |
2188 | } | |
2189 | # Ok | |
2190 | else { | |
8a565bfc | 2191 | my $msg = sprintf '%s in enclosure %s [%s]: %s C (%s max)', |
669797e1 | 2192 | $name, $encl_id, $encl_name, $reading, $max_crit; |
2193 | report('storage', $msg, $E_OK, $nexus); | |
2194 | } | |
2195 | ||
2196 | # Collect performance data | |
2197 | if (defined $opt{perfdata}) { | |
2198 | $name =~ s{\A Temperature\sProbe\s(\d+) \z}{temp_$1}gxms; | |
2199 | my $pkey = "enclosure_${encl_id}_${name}"; | |
2200 | my $pval = join q{;}, "${reading}C", $max_warn, $max_crit; | |
2201 | $perfdata{$pkey} = $pval; | |
2202 | } | |
2203 | } | |
2204 | return; | |
2205 | } | |
2206 | ||
2207 | ||
2208 | #----------------------------------------- | |
2209 | # STORAGE: Check enclosure management modules (EMM) | |
2210 | #----------------------------------------- | |
2211 | sub check_enclosure_emms { | |
2212 | return if $#controllers == -1; | |
2213 | ||
2214 | my $id = undef; | |
2215 | my $nexus = undef; | |
2216 | my $name = undef; | |
2217 | my $state = undef; | |
2218 | my $status = undef; | |
2219 | my $encl_id = undef; | |
2220 | my $encl_name = undef; | |
2221 | my @output = (); | |
2222 | ||
2223 | if ($snmp) { | |
2224 | my %emms_oid | |
2225 | = ( | |
2226 | '1.3.6.1.4.1.674.10893.1.20.130.13.1.1' => 'enclosureManagementModuleNumber', | |
2227 | '1.3.6.1.4.1.674.10893.1.20.130.13.1.2' => 'enclosureManagementModuleName', | |
2228 | '1.3.6.1.4.1.674.10893.1.20.130.13.1.4' => 'enclosureManagementModuleState', | |
2229 | '1.3.6.1.4.1.674.10893.1.20.130.13.1.11' => 'enclosureManagementModuleComponentStatus', | |
2230 | '1.3.6.1.4.1.674.10893.1.20.130.13.1.12' => 'enclosureManagementModuleNexusID', | |
2231 | '1.3.6.1.4.1.674.10893.1.20.130.14.1.4' => 'enclosureManagementModuleConnectionEnclosureName', | |
2232 | '1.3.6.1.4.1.674.10893.1.20.130.14.1.5' => 'enclosureManagementModuleConnectionEnclosureNumber', | |
2233 | ); | |
2234 | my $result = $snmp_session->get_entries(-columns => [keys %emms_oid]); | |
2235 | ||
2236 | # No enclosure EMMs is OK | |
2237 | return if !defined $result; | |
2238 | ||
2239 | @output = @{ get_snmp_output($result, \%emms_oid) }; | |
2240 | } | |
2241 | else { | |
2242 | foreach my $enc (@enclosures) { | |
2243 | push @output, @{ run_omreport("storage enclosure controller=$enc->{ctrl} enclosure=$enc->{id} info=emms") }; | |
2244 | map_item('ctrl', $enc->{ctrl}, \@output); | |
2245 | map_item('encl_id', $enc->{id}, \@output); | |
2246 | map_item('encl_name', $enc->{name}, \@output); | |
2247 | } | |
2248 | } | |
2249 | ||
2250 | my %emms_state | |
2251 | = ( | |
2252 | 0 => 'Unknown', | |
2253 | 1 => 'Ready', | |
2254 | 2 => 'Failed', | |
2255 | 3 => 'Online', | |
2256 | 4 => 'Offline', | |
2257 | 5 => 'Not Installed', | |
2258 | 6 => 'Degraded', | |
2259 | 21 => 'Missing', | |
2260 | ); | |
2261 | ||
2262 | # Check temperature probes on each of the enclosures | |
2263 | EMM: | |
2264 | foreach my $out (@output) { | |
2265 | if ($snmp) { | |
2266 | $id = $out->{enclosureManagementModuleNumber} - 1; | |
2267 | $name = $out->{enclosureManagementModuleName}; | |
2268 | $state = $emms_state{$out->{enclosureManagementModuleState}}; | |
2269 | $status = $snmp_status{$out->{enclosureManagementModuleComponentStatus}}; | |
2270 | $encl_id = $out->{enclosureManagementModuleConnectionEnclosureNumber} - 1; | |
2271 | $encl_name = $out->{enclosureManagementModuleConnectionEnclosureName}; | |
2272 | $nexus = convert_nexus($out->{enclosureManagementModuleNexusID}); | |
2273 | } | |
2274 | else { | |
2275 | $id = $out->{'ID'}; | |
2276 | $name = $out->{'Name'}; | |
2277 | $state = $out->{'State'}; | |
2278 | $status = $out->{'Status'}; | |
2279 | $encl_id = join q{:}, $out->{ctrl}, $out->{'encl_id'}; | |
2280 | $encl_name = $out->{encl_name}; | |
2281 | $nexus = join q{:}, $out->{ctrl}, $out->{'encl_id'}, $id; | |
2282 | } | |
2283 | ||
2284 | next EMM if blacklisted('encl_emm', $nexus); | |
2285 | ||
2286 | # Default | |
2287 | if ($status ne 'Ok') { | |
98b224a3 | 2288 | my $msg = sprintf '%s in enclosure %s [%s] needs attention: %s', |
669797e1 | 2289 | $name, $encl_id, $encl_name, $state; |
2290 | report('storage', $msg, $status2nagios{$status}, $nexus); | |
2291 | } | |
2292 | # Ok | |
2293 | else { | |
98b224a3 | 2294 | my $msg = sprintf '%s in enclosure %s [%s] is %s', |
669797e1 | 2295 | $name, $encl_id, $encl_name, $state; |
2296 | report('storage', $msg, $E_OK, $nexus); | |
2297 | } | |
2298 | } | |
2299 | return; | |
2300 | } | |
2301 | ||
2302 | ||
2303 | #----------------------------------------- | |
2304 | # CHASSIS: Check memory modules | |
2305 | #----------------------------------------- | |
2306 | sub check_memory { | |
2307 | my $index = undef; | |
2308 | my $status = undef; | |
2309 | my $location = undef; | |
2310 | my $size = undef; | |
2311 | my $modes = undef; | |
2312 | my @failures = (); | |
2313 | my @output = (); | |
2314 | ||
2315 | if ($snmp) { | |
2316 | my %dimm_oid | |
2317 | = ( | |
2318 | '1.3.6.1.4.1.674.10892.1.1100.50.1.2.1' => 'memoryDeviceIndex', | |
2319 | '1.3.6.1.4.1.674.10892.1.1100.50.1.5.1' => 'memoryDeviceStatus', | |
2320 | '1.3.6.1.4.1.674.10892.1.1100.50.1.8.1' => 'memoryDeviceLocationName', | |
2321 | '1.3.6.1.4.1.674.10892.1.1100.50.1.14.1' => 'memoryDeviceSize', | |
2322 | '1.3.6.1.4.1.674.10892.1.1100.50.1.20.1' => 'memoryDeviceFailureModes', | |
2323 | ); | |
2324 | my $result = $snmp_session->get_entries(-columns => [keys %dimm_oid]); | |
2325 | ||
2326 | if (!defined $result) { | |
98b224a3 | 2327 | printf "SNMP ERROR [memory]: %s.\n", $snmp_session->error; |
669797e1 | 2328 | $snmp_session->close; |
2329 | exit $E_UNKNOWN; | |
2330 | } | |
2331 | ||
2332 | @output = @{ get_snmp_output($result, \%dimm_oid) }; | |
2333 | } | |
2334 | else { | |
2335 | @output = @{ run_omreport("$omopt_chassis memory") }; | |
2336 | } | |
2337 | ||
2338 | # Note: These values are bit masks, so combination values are | |
2339 | # possible. If value is 0 (zero), memory device has no faults. | |
2340 | my %failure_mode | |
2341 | = ( | |
2342 | 1 => 'ECC single bit correction warning rate exceeded', | |
2343 | 2 => 'ECC single bit correction failure rate exceeded', | |
2344 | 4 => 'ECC multibit fault encountered', | |
2345 | 8 => 'ECC single bit correction logging disabled', | |
2346 | 16 => 'device disabled because of spare activation', | |
2347 | ); | |
2348 | ||
2349 | DIMM: | |
2350 | foreach my $out (@output) { | |
2351 | @failures = (); # Initialize | |
2352 | if ($snmp) { | |
2353 | $index = $out->{memoryDeviceIndex}; | |
2354 | $status = $snmp_status{$out->{memoryDeviceStatus}}; | |
2355 | $location = $out->{memoryDeviceLocationName}; | |
2356 | $size = sprintf '%d MB', $out->{memoryDeviceSize}/1024; | |
2357 | $modes = $out->{memoryDeviceFailureModes}; | |
2358 | if ($modes > 0) { | |
2359 | foreach my $mask (sort keys %failure_mode) { | |
2360 | if (($modes & $mask) != 0) { push @failures, $failure_mode{$mask}; } | |
2361 | } | |
2362 | } | |
2363 | } | |
2364 | else { | |
2365 | $index = $out->{'Type'} eq '[Not Occupied]' ? undef : $out->{'Index'}; | |
2366 | $status = $out->{'Status'}; | |
2367 | $location = $out->{'Connector Name'}; | |
2368 | $size = $out->{'Size'}; | |
2369 | if (defined $size) { | |
2370 | $size =~ s{\s\s}{ }gxms; | |
2371 | } | |
2372 | # Run 'omreport chassis memory index=X' to get the failures | |
2373 | if ($status ne 'Ok' && defined $index) { | |
2374 | foreach (@{ run_command("$omreport $omopt_chassis memory index=$index -fmt ssv") }) { | |
2375 | if (m/\A Failures; (.+?) \z/xms) { | |
2376 | chop(my $fail = $1); | |
2377 | push @failures, split m{\.}xms, $fail; | |
2378 | } | |
2379 | } | |
2380 | } | |
2381 | } | |
2382 | $location =~ s{\A \s*(.*?)\s* \z}{$1}xms; | |
2383 | ||
2384 | next DIMM if blacklisted('dimm', $index); | |
2385 | ||
2386 | # Ignore empty memory slots | |
2387 | next DIMM if !defined $index; | |
2388 | $count{dimm}++; | |
2389 | ||
2390 | if ($status ne 'Ok') { | |
2391 | my $msg = undef; | |
2392 | if (scalar @failures == 0) { | |
98b224a3 | 2393 | $msg = sprintf 'Memory module %d [%s, %s] needs attention (%s)', |
669797e1 | 2394 | $index, $location, $size, $status; |
2395 | } | |
2396 | else { | |
98b224a3 | 2397 | $msg = sprintf 'Memory module %d [%s, %s] needs attention: %s', |
669797e1 | 2398 | $index, $location, $size, (join q{, }, @failures); |
2399 | } | |
2400 | ||
2401 | report('chassis', $msg, $status2nagios{$status}, $index); | |
2402 | } | |
2403 | # Ok | |
2404 | else { | |
98b224a3 | 2405 | my $msg = sprintf 'Memory module %d [%s, %s] is %s', |
669797e1 | 2406 | $index, $location, $size, $status; |
2407 | report('chassis', $msg, $E_OK, $index); | |
2408 | } | |
2409 | } | |
2410 | return; | |
2411 | } | |
2412 | ||
2413 | ||
2414 | #----------------------------------------- | |
2415 | # CHASSIS: Check fans | |
2416 | #----------------------------------------- | |
2417 | sub check_fans { | |
2418 | my $index = undef; | |
2419 | my $status = undef; | |
2420 | my $reading = undef; | |
2421 | my $location = undef; | |
2422 | my $max_crit = undef; | |
2423 | my $max_warn = undef; | |
2424 | my @output = (); | |
2425 | ||
2426 | if ($snmp) { | |
2427 | my %cool_oid | |
2428 | = ( | |
2429 | '1.3.6.1.4.1.674.10892.1.700.12.1.2.1' => 'coolingDeviceIndex', | |
2430 | '1.3.6.1.4.1.674.10892.1.700.12.1.5.1' => 'coolingDeviceStatus', | |
2431 | '1.3.6.1.4.1.674.10892.1.700.12.1.6.1' => 'coolingDeviceReading', | |
2432 | '1.3.6.1.4.1.674.10892.1.700.12.1.8.1' => 'coolingDeviceLocationName', | |
2433 | '1.3.6.1.4.1.674.10892.1.700.12.1.10.1' => 'coolingDeviceUpperCriticalThreshold', | |
2434 | '1.3.6.1.4.1.674.10892.1.700.12.1.11.1' => 'coolingDeviceUpperNonCriticalThreshold', | |
2435 | ); | |
2436 | my $result = $snmp_session->get_entries(-columns => [keys %cool_oid]); | |
2437 | ||
2438 | if ($blade && !defined $result) { | |
2439 | return 0; | |
2440 | } | |
2441 | elsif (!$blade && !defined $result) { | |
98b224a3 | 2442 | printf "SNMP ERROR [cooling]: %s.\n", $snmp_session->error; |
669797e1 | 2443 | $snmp_session->close; |
2444 | exit $E_UNKNOWN; | |
2445 | } | |
2446 | ||
2447 | @output = @{ get_snmp_output($result, \%cool_oid) }; | |
2448 | } | |
2449 | else { | |
2450 | @output = @{ run_omreport("$omopt_chassis fans") }; | |
2451 | } | |
2452 | ||
2453 | FAN: | |
2454 | foreach my $out (@output) { | |
2455 | if ($snmp) { | |
2456 | $index = $out->{coolingDeviceIndex}; | |
2457 | $status = $snmp_probestatus{$out->{coolingDeviceStatus}}; | |
2458 | $reading = $out->{coolingDeviceReading}; | |
2459 | $location = $out->{coolingDeviceLocationName}; | |
2460 | $max_crit = exists $out->{coolingDeviceUpperCriticalThreshold} | |
2461 | ? $out->{coolingDeviceUpperCriticalThreshold} : 0; | |
2462 | $max_warn = exists $out->{coolingDeviceUpperNonCriticalThreshold} | |
2463 | ? $out->{coolingDeviceUpperNonCriticalThreshold} : 0; | |
2464 | } | |
2465 | else { | |
2466 | $index = $out->{'Index'}; | |
2467 | $status = $out->{'Status'}; | |
2468 | $reading = $out->{'Reading'}; | |
2469 | $location = $out->{'Probe Name'}; | |
2470 | $max_crit = $out->{'Maximum Failure Threshold'} ne '[N/A]' | |
2471 | ? $out->{'Maximum Failure Threshold'} : 0; | |
2472 | $max_warn = $out->{'Maximum Warning Threshold'} ne '[N/A]' | |
2473 | ? $out->{'Maximum Warning Threshold'} : 0; | |
2474 | $reading =~ s{\A (\d+).* \z}{$1}xms; | |
2475 | $max_warn =~ s{\A (\d+).* \z}{$1}xms; | |
2476 | $max_crit =~ s{\A (\d+).* \z}{$1}xms; | |
2477 | } | |
2478 | ||
2479 | next FAN if blacklisted('fan', $index); | |
2480 | $count{fan}++; | |
2481 | ||
2482 | if ($status ne 'Ok') { | |
98b224a3 | 2483 | my $msg = sprintf 'Chassis fan %d [%s] needs attention: %s', |
669797e1 | 2484 | $index, $location, $status; |
2485 | my $err = $snmp ? $probestatus2nagios{$status} : $status2nagios{$status}; | |
2486 | report('chassis', $msg, $err, $index); | |
2487 | } | |
2488 | else { | |
98b224a3 | 2489 | my $msg = sprintf 'Chassis fan %d [%s]: %s', |
669797e1 | 2490 | $index, $location, $reading; |
2491 | report('chassis', $msg, $E_OK, $index); | |
2492 | } | |
2493 | ||
2494 | # Collect performance data | |
2495 | if (defined $opt{perfdata}) { | |
2496 | my $pname = lc $location; | |
2497 | $pname =~ s{\s}{_}gxms; | |
2498 | $pname =~ s{proc_}{cpu#}xms; | |
2499 | my $pkey = join q{_}, 'fan', $index, $pname; | |
2500 | my $pval = join q{;}, "${reading}RPM", $max_warn, $max_crit; | |
2501 | $perfdata{$pkey} = $pval; | |
2502 | } | |
2503 | } | |
2504 | return; | |
2505 | } | |
2506 | ||
2507 | ||
2508 | #----------------------------------------- | |
2509 | # CHASSIS: Check power supplies | |
2510 | #----------------------------------------- | |
2511 | sub check_powersupplies { | |
2512 | my $index = undef; | |
2513 | my $status = undef; | |
2514 | my $type = undef; | |
2515 | my $err_type = undef; | |
2516 | my $state = undef; | |
2517 | my @states = (); | |
2518 | my @output = (); | |
2519 | ||
2520 | if ($snmp) { | |
2521 | my %ps_oid | |
2522 | = ( | |
2523 | '1.3.6.1.4.1.674.10892.1.600.12.1.2.1' => 'powerSupplyIndex', | |
2524 | '1.3.6.1.4.1.674.10892.1.600.12.1.5.1' => 'powerSupplyStatus', | |
2525 | '1.3.6.1.4.1.674.10892.1.600.12.1.7.1' => 'powerSupplyType', | |
2526 | '1.3.6.1.4.1.674.10892.1.600.12.1.11.1' => 'powerSupplySensorState', | |
2527 | '1.3.6.1.4.1.674.10892.1.600.12.1.12.1' => 'powerSupplyConfigurationErrorType', | |
2528 | ); | |
2529 | my $result = $snmp_session->get_entries(-columns => [keys %ps_oid]); | |
2530 | ||
2531 | # No instrumented PSU is OK (blades, low-end servers) | |
2532 | return 0 if !defined $result; | |
2533 | ||
2534 | @output = @{ get_snmp_output($result, \%ps_oid) }; | |
2535 | } | |
2536 | else { | |
2537 | @output = @{ run_omreport("$omopt_chassis pwrsupplies") }; | |
2538 | } | |
2539 | ||
2540 | my %ps_type | |
2541 | = ( | |
2542 | 1 => 'Other', | |
2543 | 2 => 'Unknown', | |
2544 | 3 => 'Linear', | |
2545 | 4 => 'Switching', | |
2546 | 5 => 'Battery', | |
2547 | 6 => 'Uninterruptible Power Supply', | |
2548 | 7 => 'Converter', | |
2549 | 8 => 'Regulator', | |
2550 | 9 => 'AC', | |
2551 | 10 => 'DC', | |
2552 | 11 => 'VRM', | |
2553 | ); | |
2554 | ||
2555 | my %ps_state | |
2556 | = ( | |
2557 | 1 => 'Presence detected', | |
2558 | 2 => 'Failure detected', | |
2559 | 4 => 'Predictive Failure', | |
2560 | 8 => 'AC lost', | |
2561 | 16 => 'AC lost or out-of-range', | |
2562 | 32 => 'AC out-of-range but present', | |
2563 | 64 => 'Configuration error', | |
2564 | ); | |
2565 | ||
2566 | my %ps_config_error_type | |
2567 | = ( | |
2568 | 1 => 'Vendor mismatch', | |
2569 | 2 => 'Revision mismatch', | |
2570 | 3 => 'Processor missing', | |
2571 | ); | |
2572 | ||
2573 | PS: | |
2574 | foreach my $out (@output) { | |
2575 | if ($snmp) { | |
2576 | @states = (); # contains states for the PS | |
2577 | ||
2578 | $index = $out->{powerSupplyIndex} - 1; | |
2579 | $status = $snmp_status{$out->{powerSupplyStatus}}; | |
2580 | $type = $ps_type{$out->{powerSupplyType}}; | |
2581 | $err_type = defined $out->{powerSupplyConfigurationErrorType} | |
2582 | ? $ps_config_error_type{$out->{powerSupplyConfigurationErrorType}} : undef; | |
2583 | ||
2584 | # get the combined state from the StatusReading OID | |
2585 | foreach my $mask (sort keys %ps_state) { | |
2586 | if (($out->{powerSupplySensorState} & $mask) != 0) { | |
2587 | push @states, $ps_state{$mask}; | |
2588 | } | |
2589 | } | |
2590 | ||
2591 | # If configuration error, also include the error type | |
2592 | if (defined $err_type) { | |
2593 | push @states, $err_type; | |
2594 | } | |
2595 | ||
2596 | # Finally, construct the state string | |
2597 | $state = join q{, }, @states; | |
2598 | } | |
2599 | else { | |
2600 | $index = $out->{'Index'}; | |
2601 | $status = $out->{'Status'}; | |
2602 | $type = $out->{'Type'}; | |
2603 | $state = $out->{'Online Status'}; | |
2604 | } | |
2605 | ||
2606 | next PS if blacklisted('ps', $index); | |
2607 | $count{power}++; | |
2608 | ||
2609 | if ($status ne 'Ok') { | |
98b224a3 | 2610 | my $msg = sprintf 'Power Supply %d [%s] needs attention: %s', |
669797e1 | 2611 | $index, $type, $state; |
2612 | report('chassis', $msg, $status2nagios{$status}, $index); | |
2613 | } | |
2614 | else { | |
98b224a3 | 2615 | my $msg = sprintf 'Power Supply %d [%s]: %s', |
669797e1 | 2616 | $index, $type, $state; |
2617 | report('chassis', $msg, $E_OK, $index); | |
2618 | } | |
2619 | } | |
2620 | return; | |
2621 | } | |
2622 | ||
2623 | ||
2624 | #----------------------------------------- | |
2625 | # CHASSIS: Check temperatures | |
2626 | #----------------------------------------- | |
2627 | sub check_temperatures { | |
2628 | my $index = undef; | |
2629 | my $status = undef; | |
2630 | my $reading = undef; | |
2631 | my $location = undef; | |
2632 | my $max_crit = undef; | |
2633 | my $max_warn = undef; | |
2634 | my $min_warn = undef; | |
2635 | my $min_crit = undef; | |
2636 | my $type = undef; | |
2637 | my $discrete = undef; | |
2638 | my @output = (); | |
2639 | ||
2640 | # Getting custom temperature thresholds (user option) | |
2641 | my %warn_threshold = %{ custom_temperature_thresholds('w') }; | |
2642 | my %crit_threshold = %{ custom_temperature_thresholds('c') }; | |
2643 | ||
2644 | if ($snmp) { | |
2645 | my %temp_oid | |
2646 | = ( | |
2647 | '1.3.6.1.4.1.674.10892.1.700.20.1.2.1' => 'temperatureProbeIndex', | |
2648 | '1.3.6.1.4.1.674.10892.1.700.20.1.5.1' => 'temperatureProbeStatus', | |
2649 | '1.3.6.1.4.1.674.10892.1.700.20.1.6.1' => 'temperatureProbeReading', | |
2650 | '1.3.6.1.4.1.674.10892.1.700.20.1.7.1' => 'temperatureProbeType', | |
2651 | '1.3.6.1.4.1.674.10892.1.700.20.1.8.1' => 'temperatureProbeLocationName', | |
2652 | '1.3.6.1.4.1.674.10892.1.700.20.1.10.1' => 'temperatureProbeUpperCriticalThreshold', | |
2653 | '1.3.6.1.4.1.674.10892.1.700.20.1.11.1' => 'temperatureProbeUpperNonCriticalThreshold', | |
2654 | '1.3.6.1.4.1.674.10892.1.700.20.1.12.1' => 'temperatureProbeLowerNonCriticalThreshold', | |
2655 | '1.3.6.1.4.1.674.10892.1.700.20.1.13.1' => 'temperatureProbeLowerCriticalThreshold', | |
2656 | '1.3.6.1.4.1.674.10892.1.700.20.1.16.1' => 'temperatureProbeDiscreteReading', | |
2657 | ); | |
2658 | # this didn't work well for some reason | |
2659 | #my $result = $snmp_session->get_entries(-columns => [keys %temp_oid]); | |
2660 | ||
2661 | # Getting values using the table | |
2662 | my $temperatureProbeTable = '1.3.6.1.4.1.674.10892.1.700.20'; | |
2663 | my $result = $snmp_session->get_table(-baseoid => $temperatureProbeTable); | |
2664 | ||
2665 | if (!defined $result) { | |
98b224a3 | 2666 | printf "SNMP ERROR [temperatures]: %s.\n", $snmp_session->error; |
669797e1 | 2667 | $snmp_session->close; |
2668 | exit $E_UNKNOWN; | |
2669 | } | |
2670 | ||
2671 | @output = @{ get_snmp_output($result, \%temp_oid) }; | |
2672 | } | |
2673 | else { | |
2674 | @output = @{ run_omreport("$omopt_chassis temps") }; | |
2675 | } | |
2676 | ||
2677 | my %probe_type | |
2678 | = ( | |
2679 | 1 => 'Other', # type is other than following values | |
2680 | 2 => 'Unknown', # type is unknown | |
2681 | 3 => 'AmbientESM', # type is Ambient Embedded Systems Management temperature probe | |
2682 | 16 => 'Discrete', # type is temperature probe with discrete reading | |
2683 | ); | |
2684 | ||
2685 | TEMP: | |
2686 | foreach my $out (@output) { | |
2687 | if ($snmp) { | |
2688 | $index = $out->{temperatureProbeIndex} - 1; | |
2689 | $status = $snmp_probestatus{$out->{temperatureProbeStatus}}; | |
2690 | $reading = $out->{temperatureProbeReading} / 10; | |
2691 | $location = $out->{temperatureProbeLocationName}; | |
2692 | $max_crit = $out->{temperatureProbeUpperCriticalThreshold} / 10; | |
2693 | $max_warn = $out->{temperatureProbeUpperNonCriticalThreshold} / 10; | |
2694 | $min_crit = exists $out->{temperatureProbeLowerCriticalThreshold} | |
2695 | ? $out->{temperatureProbeLowerCriticalThreshold} / 10 : '[N/A]'; | |
2696 | $min_warn = exists $out->{temperatureProbeLowerNonCriticalThreshold} | |
2697 | ? $out->{temperatureProbeLowerNonCriticalThreshold} / 10 : '[N/A]'; | |
2698 | $type = $probe_type{$out->{temperatureProbeType}}; | |
2699 | $discrete = exists $out->{temperatureProbeDiscreteReading} | |
2700 | ? $out->{temperatureProbeDiscreteReading} : undef; | |
2701 | } | |
2702 | else { | |
2703 | $index = $out->{'Index'}; | |
2704 | $status = $out->{'Status'}; | |
2705 | $reading = $out->{'Reading'}; $reading =~ s{\.0\s+C}{}xms; | |
2706 | $location = $out->{'Probe Name'}; | |
2707 | $max_crit = $out->{'Maximum Failure Threshold'}; $max_crit =~ s{\.0\s+C}{}xms; | |
2708 | $max_warn = $out->{'Maximum Warning Threshold'}; $max_warn =~ s{\.0\s+C}{}xms; | |
2709 | $min_crit = $out->{'Minimum Failure Threshold'}; $min_crit =~ s{\.0\s+C}{}xms; | |
2710 | $min_warn = $out->{'Minimum Warning Threshold'}; $min_warn =~ s{\.0\s+C}{}xms; | |
2711 | $type = $reading =~ m{\A\d+\z}xms ? 'AmbientESM' : 'Discrete'; | |
2712 | $discrete = $reading; | |
2713 | } | |
2714 | ||
2715 | next TEMP if blacklisted('temp', $index); | |
2716 | $count{temp}++; | |
2717 | ||
2718 | if ($type eq 'Discrete') { | |
2719 | my $msg = sprintf 'Temperature probe %d (%s): is %s', | |
2720 | $index, $location, $discrete; | |
2721 | my $err = $snmp ? $probestatus2nagios{$status} : $status2nagios{$status}; | |
2722 | report('chassis', $msg, $err, $index); | |
2723 | } | |
2724 | else { | |
2725 | # First check according to custom thresholds | |
2726 | if (exists $crit_threshold{$index}{max} and $reading > $crit_threshold{$index}{max}) { | |
2727 | # Custom critical MAX | |
98b224a3 | 2728 | my $msg = sprintf 'Temperature Probe %d [%s] reads %d C (custom max=%d)', |
669797e1 | 2729 | $index, $location, $reading, $crit_threshold{$index}{max}; |
2730 | report('chassis', $msg, $E_CRITICAL, $index); | |
2731 | } | |
2732 | elsif (exists $warn_threshold{$index}{max} and $reading > $warn_threshold{$index}{max}) { | |
2733 | # Custom warning MAX | |
98b224a3 | 2734 | my $msg = sprintf 'Temperature Probe %d [%s] reads %d C (custom max=%d)', |
669797e1 | 2735 | $index, $location, $reading, $warn_threshold{$index}{max}; |
2736 | report('chassis', $msg, $E_WARNING, $index); | |
2737 | } | |
2738 | elsif (exists $crit_threshold{$index}{min} and $reading < $crit_threshold{$index}{min}) { | |
2739 | # Custom critical MIN | |
98b224a3 | 2740 | my $msg = sprintf 'Temperature Probe %d [%s] reads %d C (custom min=%d)', |
669797e1 | 2741 | $index, $location, $reading, $crit_threshold{$index}{min}; |
2742 | report('chassis', $msg, $E_CRITICAL, $index); | |
2743 | } | |
2744 | elsif (exists $warn_threshold{$index}{min} and $reading < $warn_threshold{$index}{min}) { | |
2745 | # Custom warning MIN | |
98b224a3 | 2746 | my $msg = sprintf 'Temperature Probe %d [%s] reads %d C (custom min=%d)', |
669797e1 | 2747 | $index, $location, $reading, $warn_threshold{$index}{min}; |
2748 | report('chassis', $msg, $E_WARNING, $index); | |
2749 | } | |
2750 | elsif ($status ne 'Ok' and $max_crit ne '[N/A]' and $reading > $max_crit) { | |
98b224a3 | 2751 | my $msg = sprintf 'Temperature Probe %d [%s] is critically high at %d C', |
669797e1 | 2752 | $index, $location, $reading; |
2753 | my $err = $snmp ? $probestatus2nagios{$status} : $status2nagios{$status}; | |
2754 | report('chassis', $msg, $err, $index); | |
2755 | } | |
2756 | elsif ($status ne 'Ok' and $max_warn ne '[N/A]' and $reading > $max_warn) { | |
98b224a3 | 2757 | my $msg = sprintf 'Temperature Probe %d [%s] is too high at %d C', |
669797e1 | 2758 | $index, $location, $reading; |
2759 | my $err = $snmp ? $probestatus2nagios{$status} : $status2nagios{$status}; | |
2760 | report('chassis', $msg, $err, $index); | |
2761 | } | |
2762 | elsif ($status ne 'Ok' and $min_crit ne '[N/A]' and $reading < $min_crit) { | |
98b224a3 | 2763 | my $msg = sprintf 'Temperature Probe %d [%s] is critically low at %d C', |
669797e1 | 2764 | $index, $location, $reading; |
2765 | my $err = $snmp ? $probestatus2nagios{$status} : $status2nagios{$status}; | |
2766 | report('chassis', $msg, $err, $index); | |
2767 | } | |
2768 | elsif ($status ne 'Ok' and $min_warn ne '[N/A]' and $reading < $min_warn) { | |
98b224a3 | 2769 | my $msg = sprintf 'Temperature Probe %d [%s] is too low at %d C', |
669797e1 | 2770 | $index, $location, $reading; |
2771 | my $err = $snmp ? $probestatus2nagios{$status} : $status2nagios{$status}; | |
2772 | report('chassis', $msg, $err, $index); | |
2773 | } | |
2774 | # Ok | |
2775 | else { | |
304c4cba | 2776 | my $msg = sprintf 'Temperature Probe %d [%s] reads %d C', |
2777 | $index, $location, $reading; | |
2778 | if ($min_warn eq '[N/A]' and $min_crit eq '[N/A]') { | |
2779 | $msg .= sprintf ' (max=%s/%s)', $max_warn, $max_crit; | |
2780 | } | |
2781 | else { | |
2782 | $msg .= sprintf ' (min=%s/%s, max=%s/%s)', | |
2783 | $min_warn, $min_crit, $max_warn, $max_crit; | |
8ce893fd | 2784 | } |
669797e1 | 2785 | my $err = $snmp ? $probestatus2nagios{$status} : $status2nagios{$status}; |
2786 | report('chassis', $msg, $err, $index); | |
2787 | } | |
2788 | ||
2789 | # Collect performance data | |
2790 | if (defined $opt{perfdata}) { | |
2791 | my $pname = lc $location; | |
2792 | $pname =~ s{\s}{_}gxms; | |
2793 | $pname =~ s{_temp\z}{}xms; | |
2794 | $pname =~ s{proc_}{cpu#}xms; | |
2795 | my $pkey = join q{_}, 'temp', $index, $pname; | |
2796 | my $pval = join q{;}, "${reading}C", $max_warn, $max_crit; | |
2797 | $perfdata{$pkey} = $pval; | |
2798 | } | |
2799 | } | |
2800 | } | |
2801 | return; | |
2802 | } | |
2803 | ||
2804 | ||
2805 | #----------------------------------------- | |
2806 | # CHASSIS: Check processors | |
2807 | #----------------------------------------- | |
2808 | sub check_processors { | |
2809 | my $index = undef; | |
2810 | my $status = undef; | |
2811 | my $state = undef; | |
8ce893fd | 2812 | my $brand = undef; |
2813 | my $family = undef; | |
2814 | my $man = undef; | |
2815 | my $speed = undef; | |
8ce893fd | 2816 | my @output = (); |
669797e1 | 2817 | |
2818 | if ($snmp) { | |
2819 | ||
2820 | # NOTE: For some reason, older models don't have the | |
8ce893fd | 2821 | # "Processor Device Status" OIDs. We check both the newer |
2822 | # (preferred) OIDs and the old ones. | |
669797e1 | 2823 | |
8ce893fd | 2824 | my %cpu_oid |
669797e1 | 2825 | = ( |
8ce893fd | 2826 | '1.3.6.1.4.1.674.10892.1.1100.30.1.2.1' => 'processorDeviceIndex', |
2827 | '1.3.6.1.4.1.674.10892.1.1100.30.1.5.1' => 'processorDeviceStatus', | |
2828 | '1.3.6.1.4.1.674.10892.1.1100.30.1.8.1' => 'processorDeviceManufacturerName', | |
2829 | '1.3.6.1.4.1.674.10892.1.1100.30.1.9.1' => 'processorDeviceStatusState', | |
2830 | '1.3.6.1.4.1.674.10892.1.1100.30.1.10.1' => 'processorDeviceFamily', | |
2831 | '1.3.6.1.4.1.674.10892.1.1100.30.1.12.1' => 'processorDeviceCurrentSpeed', | |
2832 | '1.3.6.1.4.1.674.10892.1.1100.30.1.23.1' => 'processorDeviceBrandName', | |
2833 | '1.3.6.1.4.1.674.10892.1.1100.32.1.2.1' => 'processorDeviceStatusIndex', | |
2834 | '1.3.6.1.4.1.674.10892.1.1100.32.1.5.1' => 'processorDeviceStatusStatus', | |
2835 | '1.3.6.1.4.1.674.10892.1.1100.32.1.6.1' => 'processorDeviceStatusReading', | |
669797e1 | 2836 | ); |
2837 | ||
8ce893fd | 2838 | my $result = $snmp_session->get_entries(-columns => [keys %cpu_oid]); |
669797e1 | 2839 | |
2840 | if (!defined $result) { | |
98b224a3 | 2841 | printf "SNMP ERROR [processors]: %s.\n", $snmp_session->error; |
669797e1 | 2842 | $snmp_session->close; |
2843 | exit $E_UNKNOWN; | |
2844 | } | |
2845 | ||
8ce893fd | 2846 | @output = @{ get_snmp_output($result, \%cpu_oid) }; |
669797e1 | 2847 | } |
2848 | else { | |
2849 | @output = @{ run_omreport("$omopt_chassis processors") }; | |
2850 | } | |
2851 | ||
2852 | my %cpu_state | |
2853 | = ( | |
2854 | 1 => 'Other', # other than following values | |
2855 | 2 => 'Unknown', # unknown | |
2856 | 3 => 'Enabled', # enabled | |
2857 | 4 => 'User Disabled', # disabled by user via BIOS setup | |
2858 | 5 => 'BIOS Disabled', # disabled by BIOS (POST error) | |
2859 | 6 => 'Idle', # idle | |
2860 | ); | |
2861 | ||
2862 | my %cpu_reading | |
2863 | = ( | |
2864 | 1 => 'Internal Error', # Internal Error | |
2865 | 2 => 'Thermal Trip', # Thermal Trip | |
2866 | 32 => 'Configuration Error', # Configuration Error | |
2867 | 128 => 'Present', # Processor Present | |
2868 | 256 => 'Disabled', # Processor Disabled | |
2869 | 512 => 'Terminator Present', # Terminator Present | |
2870 | 1024 => 'Throttled', # Processor Throttled | |
2871 | ); | |
2872 | ||
8ce893fd | 2873 | # Mapping between family numbers from SNMP and actual CPU family |
2874 | my %cpu_family | |
2875 | = ( | |
d10e7068 | 2876 | 1 => 'Other', 2 => 'Unknown', 3 => '8086', |
2877 | 4 => '80286', 5 => '386', 6 => '486', | |
2878 | 7 => '8087', 8 => '80287', 9 => '80387', | |
2879 | 10 => '80487', 11 => 'Pentium', 12 => 'Pentium Pro', | |
2880 | 13 => 'Pentium II', 14 => 'Pentium with MMX', 15 => 'Celeron', | |
2881 | 16 => 'Pentium II Xeon', 17 => 'Pentium III', 18 => 'Pentium III Xeon', | |
2882 | 19 => 'Pentium III', 20 => 'Itanium', 21 => 'Xeon', | |
2883 | 22 => 'Pentium 4', 23 => 'Xeon MP', 24 => 'Itanium 2', | |
2884 | 25 => 'K5', 26 => 'K6', 27 => 'K6-2', | |
2885 | 28 => 'K6-3', 29 => 'Athlon', 30 => 'AMD2900', | |
2886 | 31 => 'K6-2+', 32 => 'Power PC', 33 => 'Power PC 601', | |
2887 | 34 => 'Power PC 603', 35 => 'Power PC 603+', 36 => 'Power PC 604', | |
2888 | 37 => 'Power PC 620', 38 => 'Power PC x704', 39 => 'Power PC 750', | |
2889 | 48 => 'Alpha', 49 => 'Alpha 21064', 50 => 'Alpha 21066', | |
2890 | 51 => 'Alpha 21164', 52 => 'Alpha 21164PC', 53 => 'Alpha 21164a', | |
2891 | 54 => 'Alpha 21264', 55 => 'Alpha 21364', 64 => 'MIPS', | |
2892 | 65 => 'MIPS R4000', 66 => 'MIPS R4200', 67 => 'MIPS R4400', | |
2893 | 68 => 'MIPS R4600', 69 => 'MIPS R10000', 80 => 'SPARC', | |
2894 | 81 => 'SuperSPARC', 82 => 'microSPARC II', 83 => 'microSPARC IIep', | |
2895 | 84 => 'UltraSPARC', 85 => 'UltraSPARC II', 86 => 'UltraSPARC IIi', | |
2896 | 87 => 'UltraSPARC III', 88 => 'UltraSPARC IIIi', 96 => '68040', | |
2897 | 97 => '68xxx', 98 => '68000', 99 => '68010', | |
2898 | 100 => '68020', 101 => '68030', 112 => 'Hobbit', | |
2899 | 120 => 'Crusoe TM5000', 121 => 'Crusoe TM3000', 122 => 'Efficeon TM8000', | |
2900 | 128 => 'Weitek', 131 => 'Athlon 64', 132 => 'Opteron', | |
2901 | 133 => 'Sempron', 134 => 'Turion 64 Mobile', 135 => 'Dual-Core Opteron', | |
2902 | 136 => 'Athlon 64 X2 DC', 137 => 'Turion 64 X2 M', 138 => 'Quad-Core Opteron', | |
2903 | 139 => '3rd gen Opteron', 144 => 'PA-RISC', 145 => 'PA-RISC 8500', | |
2904 | 146 => 'PA-RISC 8000', 147 => 'PA-RISC 7300LC', 148 => 'PA-RISC 7200', | |
2905 | 149 => 'PA-RISC 7100LC', 150 => 'PA-RISC 7100', 160 => 'V30', | |
2906 | 171 => 'Dual-Core Xeon 5200', 172 => 'Dual-Core Xeon 7200', 173 => 'Quad-Core Xeon 7300', | |
2907 | 174 => 'Quad-Core Xeon 7400', 175 => 'Multi-Core Xeon 7400', 176 => 'M1', | |
2908 | 177 => 'M2', 180 => 'AS400', 182 => 'Athlon XP', | |
2909 | 183 => 'Athlon MP', 184 => 'Duron', 185 => 'Pentium M', | |
2910 | 186 => 'Celeron D', 187 => 'Pentium D', 188 => 'Pentium Extreme', | |
2911 | 189 => 'Core Solo', 190 => 'Core2', 191 => 'Core2 Duo', | |
2912 | 198 => 'Core i7', 199 => 'Dual-Core Celeron', 200 => 'IBM390', | |
2913 | 201 => 'G4', 202 => 'G5', 203 => 'ESA/390 G6', | |
2914 | 204 => 'z/Architectur', 210 => 'C7-M', 211 => 'C7-D', | |
2915 | 212 => 'C7', 213 => 'Eden', 214 => 'Multi-Core Xeon', | |
2916 | 215 => 'Dual-Core Xeon 3xxx', 216 => 'Quad-Core Xeon 3xxx', 218 => 'Dual-Core Xeon 5xxx', | |
2917 | 219 => 'Quad-Core Xeon 5xxx', 221 => 'Dual-Core Xeon 7xxx', 222 => 'Quad-Core Xeon 7xxx', | |
24f706a6 | 2918 | 223 => 'Multi-Core Xeon 7xxx', 250 => 'i860', 251 => 'i960', |
8ce893fd | 2919 | ); |
669797e1 | 2920 | |
2921 | CPU: | |
2922 | foreach my $out (@output) { | |
2923 | if ($snmp) { | |
8ce893fd | 2924 | $index = exists $out->{processorDeviceStatusIndex} |
2925 | ? $out->{processorDeviceStatusIndex} - 1 | |
2926 | : $out->{processorDeviceIndex} - 1; | |
2927 | $status = exists $out->{processorDeviceStatusStatus} | |
2928 | ? $snmp_status{$out->{processorDeviceStatusStatus}} | |
2929 | : $snmp_status{$out->{processorDeviceStatus}}; | |
2930 | if (exists $out->{processorDeviceStatusReading}) { | |
669797e1 | 2931 | my @states = (); # contains states for the CPU |
669797e1 | 2932 | |
2933 | # get the combined state from the StatusReading OID | |
2934 | foreach my $mask (sort keys %cpu_reading) { | |
2935 | if (($out->{processorDeviceStatusReading} & $mask) != 0) { | |
2936 | push @states, $cpu_reading{$mask}; | |
2937 | } | |
2938 | } | |
2939 | ||
2940 | # Finally, create the state string | |
2941 | $state = join q{, }, @states; | |
2942 | } | |
2943 | else { | |
669797e1 | 2944 | $state = $cpu_state{$out->{processorDeviceStatusState}}; |
2945 | } | |
8ce893fd | 2946 | $man = $out->{processorDeviceManufacturerName}; |
87a0958c | 2947 | $family = (exists $out->{processorDeviceFamily} |
04a878db | 2948 | and exists $cpu_family{$out->{processorDeviceFamily}}) |
2949 | ? $cpu_family{$out->{processorDeviceFamily}} : undef; | |
8ce893fd | 2950 | $speed = $out->{processorDeviceCurrentSpeed}; |
2951 | $brand = $out->{processorDeviceBrandName}; | |
669797e1 | 2952 | } |
2953 | else { | |
2954 | $index = $out->{'Index'}; | |
2955 | $status = $out->{'Status'}; | |
2956 | $state = $out->{'State'}; | |
8ce893fd | 2957 | $brand = exists $out->{'Processor Brand'} ? $out->{'Processor Brand'} : undef; |
2958 | $family = exists $out->{'Processor Family'} ? $out->{'Processor Family'} : undef; | |
2959 | $man = exists $out->{'Processor Manufacturer'} ? $out->{'Processor Manufacturer'} : undef; | |
2960 | $speed = exists $out->{'Current Speed'} ? $out->{'Current Speed'} : undef; | |
669797e1 | 2961 | } |
2962 | ||
2963 | next CPU if blacklisted('cpu', $index); | |
2964 | ||
2965 | # Ignore unoccupied CPU slots (omreport) | |
2966 | next CPU if (defined $out->{'Processor Manufacturer'} | |
2967 | and $out->{'Processor Manufacturer'} eq '[Not Occupied]') | |
2968 | or (defined $out->{'Processor Brand'} and $out->{'Processor Brand'} eq '[Not Occupied]'); | |
2969 | ||
2970 | # Ignore unoccupied CPU slots (snmp) | |
2971 | if ($snmp and exists $out->{processorDeviceStatusReading} | |
2972 | and $out->{processorDeviceStatusReading} == 0) { | |
2973 | next CPU; | |
2974 | } | |
2975 | ||
2976 | $count{cpu}++; | |
2977 | ||
8ce893fd | 2978 | if (defined $brand) { |
2979 | $brand =~ s{\s\s+}{ }gxms; | |
e7dc67d0 | 2980 | $brand =~ s{\((R|tm)\)}{}gxms; |
2981 | $brand =~ s{\s(CPU|Processor)}{}xms; | |
8ce893fd | 2982 | $brand =~ s{\s\@}{}xms; |
2983 | } | |
2984 | elsif (defined $family and defined $man and defined $speed) { | |
2985 | $speed =~ s{\A (\d+) .*}{$1}xms; | |
2986 | $brand = sprintf '%s %s %.2fGHz', $man, $family, $speed / 1000; | |
2987 | } | |
2988 | else { | |
2989 | $brand = "unknown"; | |
2990 | } | |
2991 | ||
669797e1 | 2992 | # Default |
2993 | if ($status ne 'Ok') { | |
0a0813de | 2994 | my $msg = sprintf 'Processor %d [%s] needs attention: %s', |
8ce893fd | 2995 | $index, $brand, $state; |
669797e1 | 2996 | report('chassis', $msg, $status2nagios{$status}, $index); |
2997 | } | |
2998 | # Ok | |
2999 | else { | |
0a0813de | 3000 | my $msg = sprintf 'Processor %d [%s] is %s', |
8ce893fd | 3001 | $index, $brand, $state; |
669797e1 | 3002 | report('chassis', $msg, $E_OK, $index); |
3003 | } | |
3004 | } | |
3005 | return; | |
3006 | } | |
3007 | ||
3008 | ||
3009 | #----------------------------------------- | |
3010 | # CHASSIS: Check voltage probes | |
3011 | #----------------------------------------- | |
3012 | sub check_volts { | |
3013 | my $index = undef; | |
3014 | my $status = undef; | |
3015 | my $reading = undef; | |
3016 | my $location = undef; | |
3017 | my @output = (); | |
3018 | ||
3019 | if ($snmp) { | |
3020 | my %volt_oid | |
3021 | = ( | |
3022 | '1.3.6.1.4.1.674.10892.1.600.20.1.2.1' => 'voltageProbeIndex', | |
3023 | '1.3.6.1.4.1.674.10892.1.600.20.1.5.1' => 'voltageProbeStatus', | |
3024 | '1.3.6.1.4.1.674.10892.1.600.20.1.6.1' => 'voltageProbeReading', | |
3025 | '1.3.6.1.4.1.674.10892.1.600.20.1.8.1' => 'voltageProbeLocationName', | |
3026 | '1.3.6.1.4.1.674.10892.1.600.20.1.16.1' => 'voltageProbeDiscreteReading', | |
3027 | ); | |
3028 | ||
3029 | my $voltageProbeTable = '1.3.6.1.4.1.674.10892.1.600.20.1'; | |
3030 | my $result = $snmp_session->get_table(-baseoid => $voltageProbeTable); | |
3031 | ||
3032 | if (!defined $result) { | |
98b224a3 | 3033 | printf "SNMP ERROR [voltage]: %s.\n", $snmp_session->error; |
669797e1 | 3034 | $snmp_session->close; |
3035 | exit $E_UNKNOWN; | |
3036 | } | |
3037 | ||
3038 | @output = @{ get_snmp_output($result, \%volt_oid) }; | |
3039 | } | |
3040 | else { | |
3041 | @output = @{ run_omreport("$omopt_chassis volts") }; | |
3042 | } | |
3043 | ||
3044 | my %volt_discrete_reading | |
3045 | = ( | |
3046 | 1 => 'Good', | |
3047 | 2 => 'Bad', | |
3048 | ); | |
3049 | ||
3050 | VOLT: | |
3051 | foreach my $out (@output) { | |
3052 | if ($snmp) { | |
3053 | $index = $out->{voltageProbeIndex} - 1; | |
3054 | $status = $snmp_status{$out->{voltageProbeStatus}}; | |
3055 | $reading = exists $out->{voltageProbeReading} | |
3056 | ? sprintf('%.3f V', $out->{voltageProbeReading}/1000) | |
3057 | : $volt_discrete_reading{$out->{voltageProbeDiscreteReading}}; | |
3058 | $location = $out->{voltageProbeLocationName}; | |
3059 | } | |
3060 | else { | |
3061 | $index = $out->{'Index'}; | |
3062 | $status = $out->{'Status'}; | |
3063 | $reading = $out->{'Reading'}; | |
3064 | $location = $out->{'Probe Name'}; | |
3065 | } | |
3066 | ||
3067 | next VOLT if blacklisted('volt', $index); | |
3068 | $count{volt}++; | |
3069 | ||
98b224a3 | 3070 | my $msg = sprintf 'Voltage sensor %d [%s] is %s', |
669797e1 | 3071 | $index, $location, $reading; |
3072 | my $err = $snmp ? $probestatus2nagios{$status} : $status2nagios{$status}; | |
3073 | report('chassis', $msg, $err, $index); | |
3074 | } | |
3075 | return; | |
3076 | } | |
3077 | ||
3078 | ||
3079 | #----------------------------------------- | |
3080 | # CHASSIS: Check batteries | |
3081 | #----------------------------------------- | |
3082 | sub check_batteries { | |
3083 | my $index = undef; | |
3084 | my $status = undef; | |
3085 | my $reading = undef; | |
3086 | my $location = undef; | |
3087 | my @output = (); | |
3088 | ||
3089 | if ($snmp) { | |
3090 | my %bat_oid | |
3091 | = ( | |
3092 | '1.3.6.1.4.1.674.10892.1.600.50.1.2.1' => 'batteryIndex', | |
3093 | '1.3.6.1.4.1.674.10892.1.600.50.1.5.1' => 'batteryStatus', | |
3094 | '1.3.6.1.4.1.674.10892.1.600.50.1.6.1' => 'batteryReading', | |
3095 | '1.3.6.1.4.1.674.10892.1.600.50.1.7.1' => 'batteryLocationName', | |
3096 | ); | |
3097 | my $result = $snmp_session->get_entries(-columns => [keys %bat_oid]); | |
3098 | ||
3099 | # No batteries is OK | |
3100 | return 0 if !defined $result; | |
3101 | ||
3102 | @output = @{ get_snmp_output($result, \%bat_oid) }; | |
3103 | } | |
3104 | else { | |
3105 | @output = @{ run_omreport("$omopt_chassis batteries") }; | |
3106 | } | |
3107 | ||
3108 | my %bat_reading | |
3109 | = ( | |
3110 | 1 => 'Predictive Failure', | |
3111 | 2 => 'Failed', | |
3112 | 4 => 'Presence Detected', | |
3113 | ); | |
3114 | ||
3115 | BATTERY: | |
3116 | foreach my $out (@output) { | |
3117 | if ($snmp) { | |
3118 | $index = $out->{batteryIndex} - 1; | |
3119 | $status = $snmp_status{$out->{batteryStatus}}; | |
3120 | $reading = $bat_reading{$out->{batteryReading}}; | |
3121 | $location = $out->{batteryLocationName}; | |
3122 | } | |
3123 | else { | |
3124 | $index = $out->{'Index'}; | |
3125 | $status = $out->{'Status'}; | |
3126 | $reading = $out->{'Reading'}; | |
3127 | $location = $out->{'Probe Name'}; | |
3128 | } | |
3129 | ||
3130 | next BATTERY if blacklisted('bp', $index); | |
3131 | $count{bat}++; | |
3132 | ||
98b224a3 | 3133 | my $msg = sprintf 'Battery probe %d [%s] is %s', |
669797e1 | 3134 | $index, $location, $reading; |
3135 | report('chassis', $msg, $status2nagios{$status}, $index); | |
3136 | } | |
3137 | return; | |
3138 | } | |
3139 | ||
3140 | ||
3141 | #----------------------------------------- | |
3142 | # CHASSIS: Check amperage probes (power monitoring) | |
3143 | #----------------------------------------- | |
3144 | sub check_pwrmonitoring { | |
3145 | my $index = undef; | |
3146 | my $status = undef; | |
3147 | my $reading = undef; | |
3148 | my $location = undef; | |
3149 | my $max_crit = undef; | |
3150 | my $max_warn = undef; | |
3151 | my $unit = undef; | |
3152 | my @output = (); | |
3153 | ||
3154 | if ($snmp) { | |
3155 | my %amp_oid | |
3156 | = ( | |
3157 | '1.3.6.1.4.1.674.10892.1.600.30.1.2.1' => 'amperageProbeIndex', | |
3158 | '1.3.6.1.4.1.674.10892.1.600.30.1.5.1' => 'amperageProbeStatus', | |
3159 | '1.3.6.1.4.1.674.10892.1.600.30.1.6.1' => 'amperageProbeReading', | |
3160 | '1.3.6.1.4.1.674.10892.1.600.30.1.7.1' => 'amperageProbeType', | |
3161 | '1.3.6.1.4.1.674.10892.1.600.30.1.8.1' => 'amperageProbeLocationName', | |
3162 | '1.3.6.1.4.1.674.10892.1.600.30.1.10.1' => 'amperageProbeUpperCriticalThreshold', | |
3163 | '1.3.6.1.4.1.674.10892.1.600.30.1.11.1' => 'amperageProbeUpperNonCriticalThreshold', | |
3164 | '1.3.6.1.4.1.674.10892.1.600.30.1.16.1' => 'amperageProbeDiscreteReading', | |
3165 | ); | |
3166 | my $result = $snmp_session->get_entries(-columns => [keys %amp_oid]); | |
3167 | ||
3168 | # No pwrmonitoring is OK | |
3169 | return 0 if !defined $result; | |
3170 | ||
3171 | @output = @{ get_snmp_output($result, \%amp_oid) }; | |
3172 | } | |
3173 | else { | |
3174 | @output = @{ run_omreport("$omopt_chassis pwrmonitoring") }; | |
3175 | } | |
3176 | ||
3177 | my %amp_type # Amperage probe types | |
3178 | = ( | |
3179 | 1 => 'amperageProbeTypeIsOther', # other than following values | |
3180 | 2 => 'amperageProbeTypeIsUnknown', # unknown | |
3181 | 3 => 'amperageProbeTypeIs1Point5Volt', # 1.5 amperage probe | |
3182 | 4 => 'amperageProbeTypeIs3Point3volt', # 3.3 amperage probe | |
3183 | 5 => 'amperageProbeTypeIs5Volt', # 5 amperage probe | |
3184 | 6 => 'amperageProbeTypeIsMinus5Volt', # -5 amperage probe | |
3185 | 7 => 'amperageProbeTypeIs12Volt', # 12 amperage probe | |
3186 | 8 => 'amperageProbeTypeIsMinus12Volt', # -12 amperage probe | |
3187 | 9 => 'amperageProbeTypeIsIO', # I/O probe | |
3188 | 10 => 'amperageProbeTypeIsCore', # Core probe | |
3189 | 11 => 'amperageProbeTypeIsFLEA', # FLEA (standby) probe | |
3190 | 12 => 'amperageProbeTypeIsBattery', # Battery probe | |
3191 | 13 => 'amperageProbeTypeIsTerminator', # SCSI Termination probe | |
3192 | 14 => 'amperageProbeTypeIs2Point5Volt', # 2.5 amperage probe | |
3193 | 15 => 'amperageProbeTypeIsGTL', # GTL (ground termination logic) probe | |
3194 | 16 => 'amperageProbeTypeIsDiscrete', # amperage probe with discrete reading | |
3195 | 23 => 'amperageProbeTypeIsPowerSupplyAmps', # Power Supply probe with reading in Amps | |
3196 | 24 => 'amperageProbeTypeIsPowerSupplyWatts', # Power Supply probe with reading in Watts | |
3197 | 25 => 'amperageProbeTypeIsSystemAmps', # System probe with reading in Amps | |
3198 | 26 => 'amperageProbeTypeIsSystemWatts', # System probe with reading in Watts | |
3199 | ); | |
3200 | ||
3201 | my %amp_discrete | |
3202 | = ( | |
3203 | 1 => 'Good', | |
3204 | 2 => 'Bad', | |
3205 | ); | |
3206 | ||
3207 | my %amp_unit | |
3208 | = ( | |
3209 | 'amperageProbeTypeIsPowerSupplyAmps' => 'hA', # tenths of Amps | |
3210 | 'amperageProbeTypeIsSystemAmps' => 'hA', # tenths of Amps | |
3211 | 'amperageProbeTypeIsPowerSupplyWatts' => 'W', # Watts | |
3212 | 'amperageProbeTypeIsSystemWatts' => 'W', # Watts | |
3213 | 'amperageProbeTypeIsDiscrete' => q{}, # discrete reading, no unit | |
3214 | ); | |
3215 | ||
3216 | AMP: | |
3217 | foreach my $out (@output) { | |
3218 | if ($snmp) { | |
3219 | $index = $out->{amperageProbeIndex} - 1; | |
3220 | $status = $snmp_status{$out->{amperageProbeStatus}}; | |
3221 | $reading = $amp_type{$out->{amperageProbeType}} eq 'amperageProbeTypeIsDiscrete' | |
3222 | ? $amp_discrete{$out->{amperageProbeDiscreteReading}} | |
3223 | : $out->{amperageProbeReading}; | |
3224 | $location = $out->{amperageProbeLocationName}; | |
3225 | $max_crit = exists $out->{amperageProbeUpperCriticalThreshold} | |
3226 | ? $out->{amperageProbeUpperCriticalThreshold} : 0; | |
3227 | $max_warn = exists $out->{amperageProbeUpperNonCriticalThreshold} | |
3228 | ? $out->{amperageProbeUpperNonCriticalThreshold} : 0; | |
3229 | $unit = exists $amp_unit{$amp_type{$out->{amperageProbeType}}} | |
3230 | ? $amp_unit{$amp_type{$out->{amperageProbeType}}} : 'mA'; | |
3231 | if ($unit eq 'hA') { | |
3232 | $reading /= 10; | |
3233 | $max_crit /= 10; | |
3234 | $max_warn /= 10; | |
3235 | $unit = 'A'; | |
3236 | } | |
3237 | } | |
3238 | else { | |
3239 | $index = $out->{'Index'}; | |
0be00f80 | 3240 | next AMP if (!defined $index || $index !~ m/^\d+$/x); |
669797e1 | 3241 | $status = $out->{'Status'}; |
3242 | $reading = $out->{'Reading'}; | |
3243 | $location = $out->{'Probe Name'}; | |
3244 | $max_crit = $out->{'Failure Threshold'} ne '[N/A]' | |
3245 | ? $out->{'Failure Threshold'} : 0; | |
3246 | $max_warn = $out->{'Warning Threshold'} ne '[N/A]' | |
3247 | ? $out->{'Warning Threshold'} : 0; | |
3248 | $reading =~ s{\A (\d+.*?)\s+([a-zA-Z]+) \s*\z}{$1}xms; | |
3249 | $unit = $2; | |
3250 | $max_warn =~ s{\A (\d+.*?)\s+[a-zA-Z]+ \s*\z}{$1}xms; | |
3251 | $max_crit =~ s{\A (\d+.*?)\s+[a-zA-Z]+ \s*\z}{$1}xms; | |
3252 | } | |
3253 | ||
78dbab97 | 3254 | next AMP if blacklisted('amp', $index); |
669797e1 | 3255 | next AMP if $index !~ m{\A \d+ \z}xms; |
3256 | $count{amp}++; | |
3257 | ||
98b224a3 | 3258 | my $msg = sprintf 'Amperage probe %d [%s] reads %s %s', |
669797e1 | 3259 | $index, $location, $reading, $unit, $status; |
3260 | report('chassis', $msg, $status2nagios{$status}, $index); | |
3261 | ||
3262 | # Collect performance data | |
3263 | if (defined $opt{perfdata}) { | |
3264 | next AMP if $reading !~ m{\A \d+(\.\d+)? \z}xms; # discrete reading (not number) | |
3265 | my $pname = lc $location; | |
3266 | $pname =~ s{\s}{_}gxms; | |
3267 | my $pkey = join q{_}, 'pwr_mon', $index, $pname; | |
3268 | my $pval = join q{;}, "$reading$unit", $max_warn, $max_crit; | |
3269 | $perfdata{$pkey} = $pval; | |
3270 | } | |
3271 | } | |
3272 | ||
3273 | # Collect EXTRA performance data not found at first run. This is a | |
3274 | # rather ugly hack | |
3275 | if (defined $opt{perfdata} && !$snmp) { | |
3276 | my $found = 0; | |
3277 | my $index = 0; | |
3278 | my %used = (); | |
3279 | ||
3280 | # find used indexes | |
3281 | foreach (keys %perfdata) { | |
3282 | if (m/\A pwr_mon_(\d+)/xms) { | |
3283 | $used{$1} = 1; | |
3284 | } | |
3285 | } | |
3286 | ||
3287 | AMP2: | |
3288 | foreach my $line (@{ run_command("$omreport $omopt_chassis pwrmonitoring -fmt ssv") }) { | |
3289 | chop $line; | |
3290 | if ($line eq 'Location;Reading') { | |
3291 | $found = 1; | |
3292 | next AMP2; | |
3293 | } | |
3294 | if ($line eq q{}) { | |
3295 | $found = 0; | |
3296 | next AMP2; | |
3297 | } | |
3298 | if ($found and $line =~ m/\A ([^;]+?) ; (\d*\.\d+) \s ([AW]) \z/xms) { | |
3299 | my $aname = lc $1; | |
3300 | my $aval = $2; | |
3301 | my $aunit = $3; | |
3302 | $aname =~ s{\s}{_}gxms; | |
3303 | ||
3304 | # don't use an existing index | |
3305 | while (exists $used{$index}) { ++$index; } | |
3306 | ||
3307 | $perfdata{"pwr_mon_${index}_${aname}"} = "$aval$aunit;0;0"; | |
3308 | ++$index; | |
3309 | } | |
3310 | } | |
3311 | } | |
3312 | ||
3313 | return; | |
3314 | } | |
3315 | ||
3316 | ||
3317 | #----------------------------------------- | |
3318 | # CHASSIS: Check intrusion | |
3319 | #----------------------------------------- | |
3320 | sub check_intrusion { | |
3321 | my $index = undef; | |
3322 | my $status = undef; | |
3323 | my $reading = undef; | |
3324 | my @output = (); | |
3325 | ||
3326 | if ($snmp) { | |
3327 | my %int_oid | |
3328 | = ( | |
3329 | '1.3.6.1.4.1.674.10892.1.300.70.1.2.1' => 'intrusionIndex', | |
3330 | '1.3.6.1.4.1.674.10892.1.300.70.1.5.1' => 'intrusionStatus', | |
3331 | '1.3.6.1.4.1.674.10892.1.300.70.1.6.1' => 'intrusionReading', | |
3332 | ); | |
3333 | my $result = $snmp_session->get_entries(-columns => [keys %int_oid]); | |
3334 | ||
3335 | # No intrusion is OK | |
3336 | return 0 if !defined $result; | |
3337 | ||
3338 | @output = @{ get_snmp_output($result, \%int_oid) }; | |
3339 | } | |
3340 | else { | |
3341 | @output = @{ run_omreport("$omopt_chassis intrusion") }; | |
3342 | } | |
3343 | ||
3344 | my %int_reading | |
3345 | = ( | |
3346 | 1 => 'Not Breached', # chassis not breached and no uncleared breaches | |
3347 | 2 => 'Breached', # chassis currently breached | |
3348 | 3 => 'Breached Prior', # chassis breached prior to boot and has not been cleared | |
3349 | 4 => 'Breach Sensor Failure', # intrusion sensor has failed | |
3350 | ); | |
3351 | ||
3352 | INTRUSION: | |
3353 | foreach my $out (@output) { | |
3354 | if ($snmp) { | |
3355 | $index = $out->{intrusionIndex} - 1; | |
3356 | $status = $snmp_status{$out->{intrusionStatus}}; | |
3357 | $reading = $int_reading{$out->{intrusionReading}}; | |
3358 | } | |
3359 | else { | |
3360 | $index = $out->{'Index'}; | |
3361 | $status = $out->{'Status'}; | |
3362 | $reading = $out->{'State'}; | |
3363 | } | |
3364 | ||
3365 | next INTRUSION if blacklisted('intr', $index); | |
3366 | $count{intr}++; | |
3367 | ||
3368 | if ($status ne 'Ok') { | |
3369 | my $msg = sprintf 'Chassis intrusion %d detected: %s', | |
3370 | $index, $reading; | |
3371 | report('chassis', $msg, $E_WARNING, $index); | |
3372 | } | |
3373 | # Ok | |
3374 | else { | |
3375 | my $msg = sprintf 'Chassis intrusion %d detection: %s (%s)', | |
3376 | $index, $status, $reading; | |
3377 | report('chassis', $msg, $E_OK, $index); | |
3378 | } | |
3379 | } | |
3380 | return; | |
3381 | } | |
3382 | ||
3383 | ||
3384 | #----------------------------------------- | |
3385 | # CHASSIS: Check alert log | |
3386 | #----------------------------------------- | |
3387 | sub check_alertlog { | |
3388 | return if $snmp; # Not supported with SNMP | |
3389 | ||
3390 | my @output = @{ run_omreport("$omopt_system alertlog") }; | |
3391 | foreach my $out (@output) { | |
3392 | ++$count{alert}{$out->{Severity}}; | |
3393 | } | |
3394 | ||
3395 | # Create error messages and set exit value if appropriate | |
3396 | my $err = 0; | |
3397 | if ($count{alert}{'Critical'} > 0) { $err = $E_CRITICAL; } | |
3398 | elsif ($count{alert}{'Non-Critical'} > 0) { $err = $E_WARNING; } | |
3399 | ||
3400 | my $msg = sprintf 'Alert log content: %d critical, %d non-critical, %d ok', | |
3401 | $count{alert}{'Critical'}, $count{alert}{'Non-Critical'}, $count{alert}{'Ok'}; | |
3402 | report('other', $msg, $err); | |
3403 | ||
3404 | return; | |
3405 | } | |
3406 | ||
3407 | #----------------------------------------- | |
3408 | # CHASSIS: Check ESM log overall health | |
3409 | #----------------------------------------- | |
3410 | sub check_esmlog_health { | |
3411 | my $health = 'Ok'; | |
3412 | ||
3413 | if ($snmp) { | |
3414 | my $systemStateEventLogStatus = '1.3.6.1.4.1.674.10892.1.200.10.1.41.1'; | |
3415 | my $result = $snmp_session->get_request(-varbindlist => [$systemStateEventLogStatus]); | |
3416 | if (!defined $result) { | |
98b224a3 | 3417 | my $msg = sprintf 'SNMP ERROR [esmhealth]: %s', |
669797e1 | 3418 | $snmp_session->error; |
3419 | report('other', $msg, $E_UNKNOWN); | |
3420 | } | |
3421 | $health = $snmp_status{$result->{$systemStateEventLogStatus}}; | |
3422 | } | |
3423 | else { | |
3424 | foreach (@{ run_command("$omreport $omopt_system esmlog -fmt ssv") }) { | |
3425 | if (m/\A Health;(.+) \z/xms) { | |
3426 | $health = $1; | |
3427 | chop $health; | |
3428 | last; | |
3429 | } | |
3430 | } | |
3431 | } | |
3432 | ||
3433 | # If the overall health of the ESM log is other than "Ok", the | |
3434 | # fill grade of the log is more than 80% and the log should be | |
3435 | # cleared | |
3436 | if ($health eq 'Ok') { | |
af7c7f76 | 3437 | my $msg = sprintf 'ESM log health is Ok (less than 80%% full)'; |
669797e1 | 3438 | report('other', $msg, $E_OK); |
3439 | } | |
3440 | elsif ($health eq 'Critical') { | |
328d0a74 | 3441 | my $msg = sprintf 'ESM log is 100%% full'; |
669797e1 | 3442 | report('other', $msg, $status2nagios{$health}); |
3443 | } | |
3444 | else { | |
3445 | my $msg = sprintf 'ESM log is more than 80%% full'; | |
3446 | report('other', $msg, $status2nagios{$health}); | |
3447 | } | |
3448 | ||
3449 | return; | |
3450 | } | |
3451 | ||
3452 | #----------------------------------------- | |
3453 | # CHASSIS: Check ESM log | |
3454 | #----------------------------------------- | |
3455 | sub check_esmlog { | |
3456 | my @output = (); | |
3457 | ||
3458 | if ($snmp) { | |
3459 | my %esm_oid | |
3460 | = ( | |
3461 | '1.3.6.1.4.1.674.10892.1.300.40.1.7.1' => 'eventLogSeverityStatus', | |
3462 | ); | |
3463 | my $result = $snmp_session->get_entries(-columns => [keys %esm_oid]); | |
3464 | ||
3465 | # No entries is OK | |
3466 | return if !defined $result; | |
3467 | ||
3468 | @output = @{ get_snmp_output($result, \%esm_oid) }; | |
3469 | foreach my $out (@output) { | |
3470 | ++$count{esm}{$snmp_status{$out->{eventLogSeverityStatus}}}; | |
3471 | } | |
3472 | } | |
3473 | else { | |
3474 | @output = @{ run_omreport("$omopt_system esmlog") }; | |
3475 | foreach my $out (@output) { | |
3476 | ++$count{esm}{$out->{Severity}}; | |
3477 | } | |
3478 | } | |
3479 | ||
3480 | # Create error messages and set exit value if appropriate | |
3481 | my $err = 0; | |
3482 | if ($count{esm}{'Critical'} > 0) { $err = $E_CRITICAL; } | |
3483 | elsif ($count{esm}{'Non-Critical'} > 0) { $err = $E_WARNING; } | |
3484 | ||
3485 | my $msg = sprintf 'ESM log content: %d critical, %d non-critical, %d ok', | |
3486 | $count{esm}{'Critical'}, $count{esm}{'Non-Critical'}, $count{esm}{'Ok'}; | |
3487 | report('other', $msg, $err); | |
3488 | ||
3489 | return; | |
3490 | } | |
3491 | ||
3492 | # | |
3493 | # Handy function for checking all storage components | |
3494 | # | |
3495 | sub check_storage { | |
3496 | check_controllers(); | |
3497 | check_physical_disks(); | |
3498 | check_virtual_disks(); | |
3499 | check_cache_battery(); | |
3500 | check_connectors(); | |
3501 | check_enclosures(); | |
3502 | check_enclosure_fans(); | |
3503 | check_enclosure_pwr(); | |
3504 | check_enclosure_temp(); | |
3505 | check_enclosure_emms(); | |
3506 | return; | |
3507 | } | |
3508 | ||
3509 | ||
3510 | ||
3511 | #--------------------------------------------------------------------- | |
3512 | # Info functions | |
3513 | #--------------------------------------------------------------------- | |
3514 | ||
3515 | # | |
3516 | # Fetch output from 'omreport chassis info', put in sysinfo hash | |
3517 | # | |
3518 | sub get_omreport_chassis_info { | |
3519 | if (open my $INFO, '-|', "$omreport $omopt_chassis info -fmt ssv") { | |
3520 | my @lines = <$INFO>; | |
3521 | close $INFO; | |
3522 | foreach (@lines) { | |
3523 | next if !m/\A (Chassis\sModel|Chassis\sService\sTag|Model|Service\sTag)/xms; | |
3524 | my ($key, $val) = split /;/xms; | |
3525 | $key =~ s{\s+\z}{}xms; # remove trailing whitespace | |
3526 | $val =~ s{\s+\z}{}xms; # remove trailing whitespace | |
3527 | if ($key eq 'Chassis Model' or $key eq 'Model') { | |
3528 | $sysinfo{model} = $val; | |
3529 | } | |
3530 | if ($key eq 'Chassis Service Tag' or $key eq 'Service Tag') { | |
3531 | $sysinfo{serial} = $val; | |
3532 | } | |
3533 | } | |
3534 | } | |
3535 | return; | |
3536 | } | |
3537 | ||
3538 | # | |
3539 | # Fetch output from 'omreport chassis bios', put in sysinfo hash | |
3540 | # | |
3541 | sub get_omreport_chassis_bios { | |
3542 | if (open my $BIOS, '-|', "$omreport $omopt_chassis bios -fmt ssv") { | |
3543 | my @lines = <$BIOS>; | |
3544 | close $BIOS; | |
3545 | foreach (@lines) { | |
3546 | next if !m/;/xms; | |
3547 | my ($key, $val) = split /;/xms; | |
3548 | $key =~ s{\s+\z}{}xms; # remove trailing whitespace | |
3549 | $val =~ s{\s+\z}{}xms; # remove trailing whitespace | |
3550 | $sysinfo{bios} = $val if $key eq 'Version'; | |
3551 | $sysinfo{biosdate} = $val if $key eq 'Release Date'; | |
3552 | } | |
3553 | } | |
3554 | return; | |
3555 | } | |
3556 | ||
3557 | # | |
3558 | # Fetch output from 'omreport system operatingsystem', put in sysinfo hash | |
3559 | # | |
3560 | sub get_omreport_system_operatingsystem { | |
3561 | if (open my $VER, '-|', "$omreport $omopt_system operatingsystem -fmt ssv") { | |
3562 | my @lines = <$VER>; | |
3563 | close $VER; | |
3564 | foreach (@lines) { | |
3565 | next if !m/;/xms; | |
3566 | my ($key, $val) = split /;/xms; | |
3567 | $key =~ s{\s+\z}{}xms; # remove trailing whitespace | |
3568 | $val =~ s{\s+\z}{}xms; # remove trailing whitespace | |
3569 | if ($key eq 'Operating System') { | |
3570 | $sysinfo{osname} = $val; | |
3571 | } | |
3572 | elsif ($key eq 'Operating System Version') { | |
3573 | $sysinfo{osver} = $val; | |
3574 | } | |
3575 | } | |
3576 | } | |
3577 | return; | |
3578 | } | |
3579 | ||
3580 | # | |
3581 | # Fetch output from 'omreport about', put in sysinfo hash | |
3582 | # | |
3583 | sub get_omreport_about { | |
3584 | if (open my $OM, '-|', "$omreport about -fmt ssv") { | |
3585 | my @lines = <$OM>; | |
3586 | close $OM; | |
3587 | foreach (@lines) { | |
3588 | if (m/\A Version;(.+) \z/xms) { | |
3589 | $sysinfo{om} = $1; | |
3590 | chomp $sysinfo{om}; | |
3591 | } | |
3592 | } | |
3593 | } | |
3594 | return; | |
3595 | } | |
3596 | ||
3597 | # | |
3598 | # Fetch chassis info via SNMP, put in sysinfo hash | |
3599 | # | |
3600 | sub get_snmp_chassis_info { | |
3601 | my %chassis_oid | |
3602 | = ( | |
3603 | '1.3.6.1.4.1.674.10892.1.300.10.1.9.1' => 'chassisModelName', | |
3604 | '1.3.6.1.4.1.674.10892.1.300.10.1.11.1' => 'chassisServiceTagName', | |
3605 | ); | |
3606 | ||
3607 | my $chassisInformationTable = '1.3.6.1.4.1.674.10892.1.300.10.1'; | |
3608 | my $result = $snmp_session->get_table(-baseoid => $chassisInformationTable); | |
3609 | ||
3610 | if (defined $result) { | |
3611 | foreach my $oid (keys %{ $result }) { | |
3612 | if (exists $chassis_oid{$oid} and $chassis_oid{$oid} eq 'chassisModelName') { | |
3613 | $sysinfo{model} = $result->{$oid}; | |
3614 | $sysinfo{model} =~ s{\s+\z}{}xms; # remove trailing whitespace | |
3615 | } | |
3616 | elsif (exists $chassis_oid{$oid} and $chassis_oid{$oid} eq 'chassisServiceTagName') { | |
3617 | $sysinfo{serial} = $result->{$oid}; | |
3618 | } | |
3619 | } | |
3620 | } | |
3621 | else { | |
3622 | my $msg = sprintf 'SNMP ERROR getting chassis info: %s', | |
3623 | $snmp_session->error; | |
3624 | report('other', $msg, $E_UNKNOWN); | |
3625 | } | |
3626 | return; | |
3627 | } | |
3628 | ||
3629 | # | |
3630 | # Fetch BIOS info via SNMP, put in sysinfo hash | |
3631 | # | |
3632 | sub get_snmp_chassis_bios { | |
3633 | my %bios_oid | |
3634 | = ( | |
3635 | '1.3.6.1.4.1.674.10892.1.300.50.1.7.1.1' => 'systemBIOSReleaseDateName', | |
3636 | '1.3.6.1.4.1.674.10892.1.300.50.1.8.1.1' => 'systemBIOSVersionName', | |
3637 | ); | |
3638 | ||
3639 | my $systemBIOSTable = '1.3.6.1.4.1.674.10892.1.300.50.1'; | |
3640 | my $result = $snmp_session->get_table(-baseoid => $systemBIOSTable); | |
3641 | ||
3642 | if (defined $result) { | |
3643 | foreach my $oid (keys %{ $result }) { | |
3644 | if (exists $bios_oid{$oid} and $bios_oid{$oid} eq 'systemBIOSReleaseDateName') { | |
3645 | $sysinfo{biosdate} = $result->{$oid}; | |
3646 | $sysinfo{biosdate} =~ s{\A (\d{4})(\d{2})(\d{2}).*}{$2/$3/$1}xms; | |
3647 | } | |
3648 | elsif (exists $bios_oid{$oid} and $bios_oid{$oid} eq 'systemBIOSVersionName') { | |
3649 | $sysinfo{bios} = $result->{$oid}; | |
3650 | } | |
3651 | } | |
3652 | } | |
3653 | else { | |
3654 | my $msg = sprintf 'SNMP ERROR getting BIOS info: %s', | |
3655 | $snmp_session->error; | |
3656 | report('other', $msg, $E_UNKNOWN); | |
3657 | } | |
3658 | return; | |
3659 | } | |
3660 | ||
3661 | # | |
3662 | # Fetch OS info via SNMP, put in sysinfo hash | |
3663 | # | |
3664 | sub get_snmp_system_operatingsystem { | |
3665 | my %os_oid | |
3666 | = ( | |
3667 | '1.3.6.1.4.1.674.10892.1.400.10.1.6.1' => 'operatingSystemOperatingSystemName', | |
3668 | '1.3.6.1.4.1.674.10892.1.400.10.1.7.1' => 'operatingSystemOperatingSystemVersionName', | |
3669 | ); | |
3670 | ||
3671 | my $operatingSystemTable = '1.3.6.1.4.1.674.10892.1.400.10.1'; | |
3672 | my $result = $snmp_session->get_table(-baseoid => $operatingSystemTable); | |
3673 | ||
3674 | if (defined $result) { | |
3675 | foreach my $oid (keys %{ $result }) { | |
3676 | if (exists $os_oid{$oid} and $os_oid{$oid} eq 'operatingSystemOperatingSystemName') { | |
3677 | $sysinfo{osname} = ($result->{$oid}); | |
3678 | } | |
3679 | elsif (exists $os_oid{$oid} and $os_oid{$oid} eq 'operatingSystemOperatingSystemVersionName') { | |
3680 | $sysinfo{osver} = $result->{$oid}; | |
3681 | } | |
3682 | } | |
3683 | } | |
3684 | else { | |
3685 | my $msg = sprintf 'SNMP ERROR getting OS info: %s', | |
3686 | $snmp_session->error; | |
3687 | report('other', $msg, $E_UNKNOWN); | |
3688 | } | |
3689 | return; | |
3690 | } | |
3691 | ||
3692 | # | |
3693 | # Fetch OMSA version via SNMP, put in sysinfo hash | |
3694 | # | |
3695 | sub get_snmp_about { | |
3696 | my %omsa_oid | |
3697 | = ( | |
3698 | '1.3.6.1.4.1.674.10892.1.100.10.0' => 'systemManagementSoftwareGlobalVersionName', | |
3699 | ); | |
3700 | my $systemManagementSoftwareGroup = '1.3.6.1.4.1.674.10892.1.100'; | |
3701 | my $result = $snmp_session->get_table(-baseoid => $systemManagementSoftwareGroup); | |
3702 | if (defined $result) { | |
3703 | foreach my $oid (keys %{ $result }) { | |
3704 | if (exists $omsa_oid{$oid} and $omsa_oid{$oid} eq 'systemManagementSoftwareGlobalVersionName') { | |
3705 | $sysinfo{om} = ($result->{$oid}); | |
3706 | } | |
3707 | } | |
3708 | } | |
3709 | else { | |
3710 | my $msg = sprintf 'SNMP ERROR getting OMSA info: %s', | |
3711 | $snmp_session->error; | |
3712 | report('other', $msg, $E_UNKNOWN); | |
3713 | } | |
3714 | return; | |
3715 | } | |
3716 | ||
3717 | # | |
3718 | # Collects some information about the system | |
3719 | # | |
3720 | sub get_sysinfo | |
3721 | { | |
3722 | # Get system model and serial number | |
3723 | $snmp ? get_snmp_chassis_info() : get_omreport_chassis_info(); | |
3724 | ||
3725 | # Get BIOS information. Only if needed | |
3726 | if ( $opt{okinfo} >= 1 | |
3727 | or $opt{debug} | |
3728 | or (defined $opt{postmsg} and $opt{postmsg} =~ m/[%][bd]/xms) ) { | |
3729 | $snmp ? get_snmp_chassis_bios() : get_omreport_chassis_bios(); | |
3730 | } | |
3731 | ||
f711f8c7 | 3732 | # Get OMSA information. Only if needed |
3733 | if ($opt{okinfo} >= 3 or $opt{debug}) { | |
3734 | $snmp ? get_snmp_about() : get_omreport_about(); | |
3735 | } | |
3736 | ||
669797e1 | 3737 | # Return now if debug |
3738 | return if $opt{debug}; | |
3739 | ||
3740 | # Get OS information. Only if needed | |
3741 | if (defined $opt{postmsg} and $opt{postmsg} =~ m/[%][or]/xms) { | |
3742 | $snmp ? get_snmp_system_operatingsystem() : get_omreport_system_operatingsystem(); | |
3743 | } | |
3744 | ||
669797e1 | 3745 | return; |
3746 | } | |
3747 | ||
3748 | ||
3749 | # Helper function for running omreport when the results are strictly | |
3750 | # name=value pairs. | |
3751 | sub run_omreport_info { | |
3752 | my $command = shift; | |
3753 | my %output = (); | |
3754 | my @keys = (); | |
3755 | ||
3756 | # Run omreport and fetch output | |
3757 | my $rawtext = slurp_command("$omreport $command -fmt ssv 2>&1"); | |
3758 | ||
3759 | # Parse output, store in array | |
3760 | for ((split /\n/xms, $rawtext)) { | |
3761 | if (m/\A Error/xms) { | |
3762 | my $msg = "Problem running 'omreport $command': $_"; | |
3763 | report('other', $msg, $E_UNKNOWN); | |
3764 | } | |
3765 | next if !m/;/xms; # ignore lines with less than two fields | |
3766 | my @vals = split m/;/xms; | |
3767 | $output{$vals[0]} = $vals[1]; | |
3768 | } | |
3769 | ||
3770 | # Finally, return the collected information | |
3771 | return \%output; | |
3772 | } | |
3773 | ||
3774 | # Get various firmware information (BMC, RAC) | |
3775 | sub get_firmware_info { | |
3776 | my @snmp_output = (); | |
3777 | my %nrpe_output = (); | |
3778 | ||
3779 | if ($snmp) { | |
3780 | my %fw_oid | |
3781 | = ( | |
3782 | '1.3.6.1.4.1.674.10892.1.300.60.1.7.1' => 'firmwareType', | |
3783 | '1.3.6.1.4.1.674.10892.1.300.60.1.8.1' => 'firmwareTypeName', | |
3784 | '1.3.6.1.4.1.674.10892.1.300.60.1.11.1' => 'firmwareVersionName', | |
3785 | ); | |
3786 | ||
3787 | my $firmwareTable = '1.3.6.1.4.1.674.10892.1.300.60.1'; | |
3788 | my $result = $snmp_session->get_table(-baseoid => $firmwareTable); | |
3789 | ||
3790 | # Some don't have this OID, this is ok | |
3791 | if (!defined $result) { | |
3792 | return; | |
3793 | } | |
3794 | ||
3795 | @snmp_output = @{ get_snmp_output($result, \%fw_oid) }; | |
3796 | } | |
3797 | else { | |
3798 | %nrpe_output = %{ run_omreport_info("$omopt_chassis info") }; | |
3799 | } | |
3800 | ||
3801 | my %fw_type # Firmware types | |
3802 | = ( | |
3803 | 1 => 'other', # other than following values | |
3804 | 2 => 'unknown', # unknown | |
3805 | 3 => 'systemBIOS', # System BIOS | |
3806 | 4 => 'embeddedSystemManagementController', # Embedded System Management Controller | |
3807 | 5 => 'powerSupplyParallelingBoard', # Power Supply Paralleling Board | |
3808 | 6 => 'systemBackPlane', # System (Primary) Backplane | |
3809 | 7 => 'powerVault2XXSKernel', # PowerVault 2XXS Kernel | |
3810 | 8 => 'powerVault2XXSApplication', # PowerVault 2XXS Application | |
3811 | 9 => 'frontPanel', # Front Panel Controller | |
3812 | 10 => 'baseboardManagementController', # Baseboard Management Controller | |
3813 | 11 => 'hotPlugPCI', # Hot Plug PCI Controller | |
3814 | 12 => 'sensorData', # Sensor Data Records | |
3815 | 13 => 'peripheralBay', # Peripheral Bay Backplane | |
3816 | 14 => 'secondaryBackPlane', # Secondary Backplane for ESM 2 systems | |
3817 | 15 => 'secondaryBackPlaneESM3And4', # Secondary Backplane for ESM 3 and 4 systems | |
3818 | 16 => 'rac', # Remote Access Controller | |
3819 | 17 => 'imc' # Integrated Management Controller | |
3820 | ); | |
3821 | ||
3822 | ||
3823 | if ($snmp) { | |
3824 | foreach my $out (@snmp_output) { | |
3825 | if ($fw_type{$out->{firmwareType}} eq 'baseboardManagementController') { | |
3826 | $sysinfo{'bmc'} = 1; | |
3827 | $sysinfo{'bmc_fw'} = $out->{firmwareVersionName}; | |
3828 | } | |
3829 | elsif ($fw_type{$out->{firmwareType}} =~ m{\A rac|imc \z}xms) { | |
3830 | my $name = $out->{firmwareTypeName}; $name =~ s/\s//gxms; | |
3831 | $sysinfo{'rac'} = 1; | |
3832 | $sysinfo{'rac_name'} = $name; | |
3833 | $sysinfo{'rac_fw'} = $out->{firmwareVersionName}; | |
3834 | } | |
3835 | } | |
3836 | } | |
3837 | else { | |
3838 | foreach my $key (keys %nrpe_output) { | |
3839 | next if !defined $nrpe_output{$key}; | |
3840 | if ($key eq 'BMC Version' or $key eq 'Baseboard Management Controller Version') { | |
3841 | $sysinfo{'bmc'} = 1; | |
3842 | $sysinfo{'bmc_fw'} = $nrpe_output{$key}; | |
3843 | } | |
3844 | elsif ($key =~ m{\A (i?DRAC)\s*(\d?)\s+Version}xms) { | |
3845 | my $name = "$1$2"; | |
3846 | $sysinfo{'rac'} = 1; | |
3847 | $sysinfo{'rac_fw'} = $nrpe_output{$key}; | |
3848 | $sysinfo{'rac_name'} = $name; | |
3849 | } | |
3850 | } | |
3851 | } | |
3852 | ||
3853 | return; | |
3854 | } | |
3855 | ||
3856 | ||
3857 | ||
3858 | #===================================================================== | |
3859 | # Main program | |
3860 | #===================================================================== | |
3861 | ||
3862 | # Here we do the actual checking of components | |
3863 | # Check global status if applicable | |
3864 | if ($global) { | |
3865 | $globalstatus = check_global(); | |
3866 | } | |
3867 | ||
3868 | # Do multiple selected checks | |
3869 | if ($check{storage}) { check_storage(); } | |
3870 | if ($check{memory}) { check_memory(); } | |
3871 | if ($check{fans}) { check_fans(); } | |
3872 | if ($check{power}) { check_powersupplies(); } | |
3873 | if ($check{temp}) { check_temperatures(); } | |
3874 | if ($check{cpu}) { check_processors(); } | |
3875 | if ($check{voltage}) { check_volts(); } | |
3876 | if ($check{batteries}) { check_batteries(); } | |
3877 | if ($check{amperage}) { check_pwrmonitoring(); } | |
3878 | if ($check{intrusion}) { check_intrusion(); } | |
3879 | if ($check{alertlog}) { check_alertlog(); } | |
3880 | if ($check{esmlog}) { check_esmlog(); } | |
3881 | if ($check{esmhealth}) { check_esmlog_health(); } | |
3882 | ||
3883 | ||
3884 | #--------------------------------------------------------------------- | |
3885 | # Finish up | |
3886 | #--------------------------------------------------------------------- | |
3887 | ||
3888 | # Counter variable | |
3889 | %nagios_alert_count | |
3890 | = ( | |
3891 | 'OK' => 0, | |
3892 | 'WARNING' => 0, | |
3893 | 'CRITICAL' => 0, | |
3894 | 'UNKNOWN' => 0, | |
3895 | ); | |
3896 | ||
3897 | # Get system information | |
3898 | get_sysinfo(); | |
3899 | ||
3900 | # Get firmware info if requested via option | |
3901 | if ($opt{okinfo} >= 1) { | |
3902 | get_firmware_info(); | |
3903 | } | |
3904 | ||
3905 | # Close SNMP session | |
3906 | if ($snmp) { | |
3907 | $snmp_session->close; | |
3908 | } | |
3909 | ||
3910 | # Print messages | |
3911 | if ($opt{debug}) { | |
3912 | print " System: $sysinfo{model}\n"; | |
f711f8c7 | 3913 | print " ServiceTag: $sysinfo{serial}"; |
3914 | print q{ } x (25 - length $sysinfo{serial}), "OMSA version: $sysinfo{om}\n"; | |
3915 | print " BIOS/date: $sysinfo{bios} $sysinfo{biosdate}"; | |
3916 | print q{ } x (25 - length "$sysinfo{bios} $sysinfo{biosdate}"), "Plugin version: $VERSION\n"; | |
669797e1 | 3917 | if ($#report_storage >= 0) { |
3918 | print "-----------------------------------------------------------------------------\n"; | |
3919 | print " Storage Components \n"; | |
3920 | print "=============================================================================\n"; | |
3921 | print " STATE | ID | MESSAGE TEXT \n"; | |
3922 | print "---------+----------+--------------------------------------------------------\n"; | |
3923 | foreach (@report_storage) { | |
3924 | my ($msg, $level, $nexus) = @{$_}; | |
3925 | print q{ } x (8 - length $reverse_exitcode{$level}) . "$reverse_exitcode{$level} | " | |
3926 | . q{ } x (8 - length $nexus) . "$nexus | $msg\n"; | |
3927 | $nagios_alert_count{$reverse_exitcode{$level}}++; | |
3928 | } | |
3929 | } | |
3930 | if ($#report_chassis >= 0) { | |
3931 | print "-----------------------------------------------------------------------------\n"; | |
3932 | print " Chassis Components \n"; | |
3933 | print "=============================================================================\n"; | |
1d003803 | 3934 | print " STATE | ID | MESSAGE TEXT \n"; |
669797e1 | 3935 | print "---------+------+------------------------------------------------------------\n"; |
3936 | foreach (@report_chassis) { | |
3937 | my ($msg, $level, $nexus) = @{$_}; | |
3938 | print q{ } x (8 - length $reverse_exitcode{$level}) . "$reverse_exitcode{$level} | " | |
3939 | . q{ } x (4 - length $nexus) . "$nexus | $msg\n"; | |
3940 | $nagios_alert_count{$reverse_exitcode{$level}}++; | |
3941 | } | |
3942 | } | |
3943 | if ($#report_other >= 0) { | |
3944 | print "-----------------------------------------------------------------------------\n"; | |
3945 | print " Other messages \n"; | |
3946 | print "=============================================================================\n"; | |
3947 | print " STATE | MESSAGE TEXT \n"; | |
3948 | print "---------+-------------------------------------------------------------------\n"; | |
3949 | foreach (@report_other) { | |
3950 | my ($msg, $level, $nexus) = @{$_}; | |
3951 | print q{ } x (8 - length $reverse_exitcode{$level}) . "$reverse_exitcode{$level} | $msg\n"; | |
3952 | $nagios_alert_count{$reverse_exitcode{$level}}++; | |
3953 | } | |
3954 | } | |
3955 | } | |
3956 | else { | |
3957 | my $c = 0; # counter to determine linebreaks | |
3958 | ||
3959 | # Run through each message, sorted by severity level | |
3960 | ALERT: | |
3961 | foreach (sort {$a->[1] < $b->[1]} (@report_storage, @report_chassis, @report_other)) { | |
3962 | my ($msg, $level, $nexus) = @{ $_ }; | |
3963 | next ALERT if $level == $E_OK; | |
3964 | ||
3965 | if (defined $opt{only}) { | |
3966 | # If user wants only critical alerts | |
3967 | next ALERT if ($opt{only} eq 'critical' and $level == $E_WARNING); | |
3968 | ||
3969 | # If user wants only warning alerts | |
3970 | next ALERT if ($opt{only} eq 'warning' and $level == $E_CRITICAL); | |
3971 | } | |
3972 | ||
3973 | # Prefix with service tag if specified with option '-i|--info' | |
3974 | if ($opt{info}) { | |
3975 | if (defined $opt{htmlinfo}) { | |
3976 | $msg = '[<a href="' . warranty_url($sysinfo{serial}) | |
3977 | . "\">$sysinfo{serial}</a>] " . $msg; | |
3978 | } | |
3979 | else { | |
3980 | $msg = "[$sysinfo{serial}] " . $msg; | |
3981 | } | |
3982 | } | |
3983 | ||
3984 | # Prefix with nagios level if specified with option '--state' | |
3985 | $msg = $reverse_exitcode{$level} . ": $msg" if $opt{state}; | |
3986 | ||
3987 | # Prefix with one-letter nagios level if specified with option '--short-state' | |
3988 | $msg = (substr $reverse_exitcode{$level}, 0, 1) . ": $msg" if $opt{shortstate}; | |
3989 | ||
3990 | ($c++ == 0) ? print $msg : print $linebreak, $msg; | |
3991 | ||
3992 | $nagios_alert_count{$reverse_exitcode{$level}}++; | |
3993 | } | |
3994 | } | |
3995 | ||
3996 | # Determine our exit code | |
3997 | $exit_code = $E_OK; | |
3998 | $exit_code = $E_UNKNOWN if $nagios_alert_count{'UNKNOWN'} > 0; | |
3999 | $exit_code = $E_WARNING if $nagios_alert_count{'WARNING'} > 0; | |
4000 | $exit_code = $E_CRITICAL if $nagios_alert_count{'CRITICAL'} > 0; | |
4001 | ||
4002 | # Global status via SNMP.. extra safety check | |
4003 | if ($globalstatus != $E_OK && $exit_code == $E_OK && !defined $opt{only}) { | |
4004 | print "OOPS! Something is wrong with this server, but I don't know what. "; | |
4005 | print "The global system health status is $reverse_exitcode{$globalstatus}, "; | |
4006 | print "but every component check is OK. This may be a bug in the Nagios plugin, "; | |
4007 | print "please file a bug report.\n"; | |
4008 | exit $E_UNKNOWN; | |
4009 | } | |
4010 | ||
4011 | # Print OK message | |
4012 | if ($exit_code == $E_OK && defined $opt{only} && $opt{only} !~ m{\A critical|warning|chassis \z}xms && !$opt{debug}) { | |
4013 | my %okmsg | |
4014 | = ( 'storage' => "STORAGE OK - $count{pdisk} physical drives, $count{vdisk} logical drives", | |
4015 | 'fans' => $count{fan} == 0 && $blade ? 'OK - blade system with no fan probes' : "FANS OK - $count{fan} fan probes checked", | |
4016 | 'temp' => "TEMPERATURES OK - $count{temp} temperature probes checked", | |
4017 | 'memory' => "MEMORY OK - $count{dimm} memory modules checked", | |
4018 | 'power' => $count{power} == 0 ? 'OK - no instrumented power supplies found' : "POWER OK - $count{power} power supplies checked", | |
4019 | 'cpu' => "PROCESSORS OK - $count{cpu} processors checked", | |
4020 | 'voltage' => "VOLTAGE OK - $count{volt} voltage probes checked", | |
4021 | 'batteries' => $count{bat} == 0 ? 'OK - no batteries found' : "BATTERIES OK - $count{bat} batteries checked", | |
4022 | 'amperage' => $count{amp} == 0 ? 'OK - no power monitoring probes found' : "AMPERAGE OK - $count{amp} amperage (power monitoring) probes checked", | |
4023 | 'intrusion' => $count{intr} == 0 ? 'OK - no intrusion detection probes found' : "INTRUSION OK - $count{intr} intrusion detection probes checked", | |
4024 | 'alertlog' => $snmp ? 'OK - not supported via snmp' : "OK - Alert Log content: $count{alert}{Ok} ok, $count{alert}{'Non-Critical'} warning and $count{alert}{Critical} critical", | |
4025 | 'esmlog' => "OK - ESM Log content: $count{esm}{Ok} ok, $count{esm}{'Non-Critical'} warning and $count{esm}{Critical} critical", | |
4026 | 'esmhealth' => "ESM LOG OK - less than 80% used", | |
4027 | ); | |
4028 | ||
4029 | print $okmsg{$opt{only}}; | |
4030 | } | |
4031 | elsif ($exit_code == $E_OK && !$opt{debug}) { | |
4032 | if (defined $opt{htmlinfo}) { | |
4033 | printf q{OK - System: '<a href="%s">%s</a>', SN: '<a href="%s">%s</a>', hardware working fine}, | |
4034 | documentation_url($sysinfo{model}), $sysinfo{model}, | |
4035 | warranty_url($sysinfo{serial}), $sysinfo{serial}; | |
4036 | } | |
4037 | else { | |
4038 | printf q{OK - System: '%s', SN: '%s', hardware working fine}, | |
4039 | $sysinfo{model}, $sysinfo{serial}; | |
4040 | } | |
4041 | ||
4042 | if ($check{storage}) { | |
4043 | printf ', %d logical drives, %d physical drives', | |
4044 | $count{vdisk}, $count{pdisk}; | |
4045 | } | |
4046 | else { | |
4047 | print ', not checking storage'; | |
4048 | } | |
4049 | ||
4050 | if ($opt{okinfo} >= 1) { | |
4051 | print $linebreak; | |
4052 | printf q{----- BIOS='%s %s'}, $sysinfo{bios}, $sysinfo{biosdate}; | |
4053 | ||
4054 | if ($sysinfo{rac}) { | |
4055 | printf q{, %s='%s'}, $sysinfo{rac_name}, $sysinfo{rac_fw}; | |
4056 | } | |
4057 | if ($sysinfo{bmc}) { | |
4058 | printf q{, BMC='%s'}, $sysinfo{bmc_fw}; | |
4059 | } | |
4060 | } | |
4061 | ||
4062 | if ($opt{okinfo} >= 2) { | |
4063 | if ($check{storage}) { | |
4064 | my @storageprint = (); | |
4065 | foreach my $id (sort keys %{ $sysinfo{controller} }) { | |
4066 | chomp $sysinfo{controller}{$id}{driver}; | |
956cf4d1 | 4067 | my $msg = sprintf q{----- Ctrl %s [%s]: Fw='%s', Dr='%s'}, |
669797e1 | 4068 | $sysinfo{controller}{$id}{id}, $sysinfo{controller}{$id}{name}, |
4069 | $sysinfo{controller}{$id}{firmware}, $sysinfo{controller}{$id}{driver}; | |
956cf4d1 | 4070 | if (defined $sysinfo{controller}{$id}{storport}) { |
4071 | $msg .= sprintf q{, Storport: '%s'}, $sysinfo{controller}{$id}{storport}; | |
4072 | } | |
4073 | push @storageprint, $msg; | |
669797e1 | 4074 | } |
4075 | foreach my $id (sort keys %{ $sysinfo{enclosure} }) { | |
956cf4d1 | 4076 | push @storageprint, sprintf q{----- Encl %s [%s]: Fw='%s'}, |
669797e1 | 4077 | $sysinfo{enclosure}{$id}->{id}, $sysinfo{enclosure}{$id}->{name}, |
4078 | $sysinfo{enclosure}{$id}->{firmware}; | |
4079 | } | |
4080 | ||
4081 | # print stuff | |
4082 | foreach my $line (@storageprint) { | |
4083 | print $linebreak, $line; | |
4084 | } | |
4085 | } | |
4086 | } | |
4087 | ||
4088 | if ($opt{okinfo} >= 3) { | |
4089 | print "$linebreak----- OpenManage Server Administrator (OMSA) version: '$sysinfo{om}'"; | |
4090 | } | |
4091 | ||
4092 | } | |
4093 | else { | |
4094 | if ($opt{extinfo}) { | |
4095 | print $linebreak; | |
4096 | if (defined $opt{htmlinfo}) { | |
4097 | printf '------ SYSTEM: <a href="%s">%s</a>, SN: <a href="%s">%s</a>', | |
4098 | documentation_url($sysinfo{model}), $sysinfo{model}, | |
4099 | warranty_url($sysinfo{serial}), $sysinfo{serial}; | |
4100 | } | |
4101 | else { | |
4102 | printf '------ SYSTEM: %s, SN: %s', | |
4103 | $sysinfo{model}, $sysinfo{serial}; | |
4104 | } | |
4105 | } | |
4106 | if (defined $opt{postmsg}) { | |
4107 | my $post = undef; | |
4108 | if (-f $opt{postmsg}) { | |
4109 | open my $POST, '<', $opt{postmsg} | |
4110 | or ( print $linebreak | |
4111 | and print "ERROR: Couldn't open post message file $opt{postmsg}: $!\n" | |
4112 | and exit $E_UNKNOWN ); | |
4113 | $post = <$POST>; | |
4114 | close $POST; | |
4115 | chomp $post; | |
4116 | } | |
4117 | else { | |
4118 | $post = $opt{postmsg}; | |
4119 | } | |
4120 | if (defined $post) { | |
4121 | print $linebreak; | |
4122 | $post =~ s{[%]s}{$sysinfo{serial}}gxms; | |
4123 | $post =~ s{[%]m}{$sysinfo{model}}gxms; | |
4124 | $post =~ s{[%]b}{$sysinfo{bios}}gxms; | |
4125 | $post =~ s{[%]d}{$sysinfo{biosdate}}gxms; | |
4126 | $post =~ s{[%]o}{$sysinfo{osname}}gxms; | |
4127 | $post =~ s{[%]r}{$sysinfo{osver}}gxms; | |
4128 | $post =~ s{[%]p}{$count{pdisk}}gxms; | |
4129 | $post =~ s{[%]l}{$count{vdisk}}gxms; | |
4130 | $post =~ s{[%]n}{$linebreak}gxms; | |
4131 | $post =~ s{[%]{2}}{%}gxms; | |
4132 | print $post; | |
4133 | } | |
4134 | } | |
4135 | } | |
4136 | ||
cbbc270f | 4137 | # Print any perl warnings that have occured |
4138 | if (@perl_warnings) { | |
4139 | foreach (@perl_warnings) { | |
4140 | chop @$_; | |
4141 | print "${linebreak}INTERNAL ERROR: @$_"; | |
4142 | } | |
4143 | $exit_code = $E_UNKNOWN; | |
4144 | } | |
4145 | ||
4146 | # Reset the WARN signal | |
4147 | $SIG{__WARN__} = $original_sigwarn; | |
4148 | ||
669797e1 | 4149 | # Print performance data |
4150 | if (defined $opt{perfdata} && !$opt{debug} && %perfdata) { | |
4151 | my $lb = $opt{perfdata} eq 'multiline' ? "\n" : q{ }; # line break for perfdata | |
4152 | print q{|}; | |
4153 | ||
4154 | sub perfdata { | |
4155 | my %order | |
4156 | = ( | |
4157 | fan => 0, | |
4158 | pwr => 1, | |
4159 | temp => 2, | |
4160 | enclosure => 3, | |
4161 | ); | |
4162 | return ($order{(split /_/, $a, 2)[0]} cmp $order{(split /_/, $b, 2)[0]}) || $a cmp $b; | |
4163 | } | |
4164 | ||
4165 | print join $lb, map { "'$_'=$perfdata{$_}" } sort perfdata keys %perfdata; | |
4166 | } | |
e133d101 | 4167 | |
4168 | # Print a linebreak at the end | |
669797e1 | 4169 | print "\n" if !$opt{debug}; |
4170 | ||
4171 | # Exit with proper exit code | |
4172 | exit $exit_code; |