diff options
author | Subhendu Ghosh <sghosh@users.sourceforge.net> | 2003-02-09 14:16:29 +0000 |
---|---|---|
committer | Subhendu Ghosh <sghosh@users.sourceforge.net> | 2003-02-09 14:16:29 +0000 |
commit | 07fe1d77c03173f0291da02360a806260542b559 (patch) | |
tree | fdd11279ed05b38759afef2262f92b51cc091ba3 /contrib/check_remote_nagios_status.pl | |
parent | d4f25e47a0b89bdbcee8172fd2c0be8bf3b7f112 (diff) | |
download | monitoring-plugins-07fe1d77c03173f0291da02360a806260542b559.tar.gz |
more contribs
git-svn-id: https://nagiosplug.svn.sourceforge.net/svnroot/nagiosplug/nagiosplug/trunk@300 f882894a-f735-0410-b71e-b25c423dba1c
Diffstat (limited to 'contrib/check_remote_nagios_status.pl')
-rw-r--r-- | contrib/check_remote_nagios_status.pl | 666 |
1 files changed, 666 insertions, 0 deletions
diff --git a/contrib/check_remote_nagios_status.pl b/contrib/check_remote_nagios_status.pl new file mode 100644 index 00000000..dc99705e --- /dev/null +++ b/contrib/check_remote_nagios_status.pl | |||
@@ -0,0 +1,666 @@ | |||
1 | #!/usr/bin/perl -w | ||
2 | |||
3 | # check_status.pl Nagios Plugin - Version 1.3 | ||
4 | # Last Updated: 1/9/2003 | ||
5 | # | ||
6 | # Report any bugs/questions to Russell Scibetti at russell@quadrix.com | ||
7 | # | ||
8 | # check_status Change Log: | ||
9 | # | ||
10 | # To do for 1.4 | ||
11 | # - Better help and documentation (separate doc?) | ||
12 | # - Take argument (patterns to match) from a separate spec file | ||
13 | # | ||
14 | # New Addition to 1.3 | ||
15 | # - Added ChangeLog information and updated --help output | ||
16 | # - hostdown (hd) argument for how a service check should respond | ||
17 | # when its host is Down/Unreachable | ||
18 | # (--hostdown="ok|warning|critical|unknown") | ||
19 | # - Changed name from check_state to check_status | ||
20 | # - Set hostdown to default to OK when the argument isn't specified | ||
21 | # - Number of Hosts checked is now output in OK result | ||
22 | # | ||
23 | # Version 1.2 additions: | ||
24 | # | ||
25 | # - Added ability to handle ack'd and downtimed services differently | ||
26 | # depending on argument provided | ||
27 | # (--ack="ok|warning|critical|unknown|down|unreachable" | ||
28 | # --dt="ok|warning|critical|unknown|down|unreachable") | ||
29 | # | ||
30 | # Version 1.1 additions: | ||
31 | # | ||
32 | # - Added --host=<regex>, --servhost=<regex> to allow for specific field | ||
33 | # matching (host for matching hostname in host checks, servhost for | ||
34 | # matching the hostname in service checks, service for matching the | ||
35 | # service name in service checks) | ||
36 | # - Output the number of OK services for an OK output | ||
37 | # | ||
38 | # Version 1.0 features: | ||
39 | # | ||
40 | # - Freshness check of status.log (timestamp) | ||
41 | # - Match service or host checks | ||
42 | # - Can ignore acknowledged or downtimes services/hosts (--ack, --dt) | ||
43 | # - Can output different levels of detail dependent on # of problems | ||
44 | # - Can check for number of critical, warning, or unknowns | ||
45 | # | ||
46 | ############################################################# | ||
47 | |||
48 | use Getopt::Long; | ||
49 | use File::stat; | ||
50 | |||
51 | Getopt::Long::Configure('bundling'); | ||
52 | |||
53 | GetOptions | ||
54 | ("V" => \$version, "version" => \$version, | ||
55 | "h" => \$help, "help" => \$help, | ||
56 | "v" => \$verbose, "verbose" => \$verbose, | ||
57 | "w=s" => \$warning, "warning=s" => \$warning, | ||
58 | "c=s" => \$critical, "critical=s" => \$critical, | ||
59 | "u=s" => \$unknown, "unknown=s" => \$unknown, | ||
60 | "p=s" => \$pattern, "pattern=s" => \$pattern, | ||
61 | "S:s" => \$service, "service:s" => \$service, | ||
62 | "s=s" => \$status, "status=s" => \$status, | ||
63 | "d=s" => \$dir, "dir=s" => \$dir, | ||
64 | "D=s" => \$details, "details=s" => \$details, | ||
65 | "H:s" => \$host, "host:s" => \$host, | ||
66 | "f=s" => \$freshness, "freshness=s" => \$freshness, | ||
67 | "servhost=s" => \$servhost, | ||
68 | "a:s" => \$ack, "ack:s" => \$ack, | ||
69 | "dt:s"=> \$dt, "downtime:s" => \$dt, | ||
70 | "hd:s"=> \$hdown, "hostdown:s" => \$hdown, | ||
71 | "ok" => \$ok); | ||
72 | |||
73 | #Constants: | ||
74 | my $OK = 0; | ||
75 | my $WARNING = 1; | ||
76 | my $CRITICAL = 2; | ||
77 | my $UNKNOWN = 3; | ||
78 | |||
79 | my $crit="CRITICAL"; | ||
80 | my $warn="WARNING"; | ||
81 | my $unk="UNKNOWN"; | ||
82 | my $down="DOWN"; | ||
83 | my $unreach="UNREACHABLE"; | ||
84 | |||
85 | # Print out Help information | ||
86 | if ($help) { | ||
87 | printVersion(); | ||
88 | printHelp(); | ||
89 | exitcheck($UNKNOWN); | ||
90 | } | ||
91 | |||
92 | # Print out version information | ||
93 | if ($version) { | ||
94 | printVersion(); | ||
95 | exitcheck($UNKNOWN); | ||
96 | } | ||
97 | |||
98 | # Check for status log or directory argument or print usage | ||
99 | if (!$status) { | ||
100 | if (!$dir) { | ||
101 | print "Usage: $0 -s <status file> | -d <Nagios log dir>\n"; | ||
102 | print "Use the --help option for full list of arguments\n"; | ||
103 | exitcheck($UNKNOWN); | ||
104 | } | ||
105 | elsif ($dir =~ m#[^/]/$#) { | ||
106 | $status = $dir . "status.log"; | ||
107 | } | ||
108 | else { | ||
109 | $status = $dir . "/status.log"; | ||
110 | } | ||
111 | } | ||
112 | |||
113 | if (defined $host) { | ||
114 | if (!$host) { | ||
115 | $host="[^\\s]*"; | ||
116 | } | ||
117 | } | ||
118 | |||
119 | if (!$host && !$servhost) { | ||
120 | $servhost="[^\\s]*"; | ||
121 | } | ||
122 | |||
123 | if (!$host && !$service) { | ||
124 | $service="[^\\s]*"; | ||
125 | } | ||
126 | |||
127 | if (defined $ack) { | ||
128 | if (!$ack) { | ||
129 | $ack="ok"; | ||
130 | } | ||
131 | elsif (!($ack =~ "ok|critical|warning|unknown|down|unreachable")) { | ||
132 | print "Invalid value for ack\n"; | ||
133 | exitcheck($UNKNOWN); | ||
134 | } | ||
135 | } | ||
136 | |||
137 | if (defined $dt) { | ||
138 | if (!$dt) { | ||
139 | $dt="ok"; | ||
140 | } | ||
141 | elsif (!($dt =~ "ok|critical|warning|unknown|down|unreachable")) { | ||
142 | print "Invalid value for dt\n"; | ||
143 | exitcheck($UNKNOWN); | ||
144 | } | ||
145 | } | ||
146 | |||
147 | if (defined $hdown) { | ||
148 | if (!$hdown) { | ||
149 | $hdown="ok"; | ||
150 | } | ||
151 | elsif (!($hdown =~ "ok|critical|warning|unknown|down|unreachable")) { | ||
152 | print "Invalid value for hostdown\n"; | ||
153 | exitcheck($UNKNOWN); | ||
154 | } | ||
155 | } | ||
156 | |||
157 | my $much_details = 0; | ||
158 | |||
159 | my $ServiceNotOK = "CRITICAL|WARNING|UNKNOWN"; | ||
160 | my $HostNotOK = "DOWN|UNREACHABLE"; | ||
161 | |||
162 | my %numprob = ("WARNING",0,"CRITICAL",0,"UNKNOWN",0,"DOWN",0,"UNREACHABLE",0); | ||
163 | |||
164 | my $CritOnly = 0; | ||
165 | my $WarnOnly = 0; | ||
166 | my $UnkOnly = 0; | ||
167 | |||
168 | my @wlev; | ||
169 | my @clev; | ||
170 | my @ulev; | ||
171 | my %warnlevel = ("WARNING",0,"CRITICAL",0,"UNKNOWN",0); | ||
172 | my %critlevel = ("WARNING",0,"CRITICAL",0,"UNKNOWN",0); | ||
173 | my %unklevel = ("WARNING",0,"CRITICAL",0,"UNKNOWN",0); | ||
174 | my %hostlevel = ("DOWN",0,"UNREACHABLE",0); | ||
175 | |||
176 | # Store Hosts in downtime | ||
177 | my @hostdowntime; | ||
178 | my $numdowntime = 0; | ||
179 | |||
180 | # Store Hosts in a Down/Unreachable state | ||
181 | my @hostdown; | ||
182 | my $numdown = 0; | ||
183 | |||
184 | # Hash for storing state-change to OK times for hosts: | ||
185 | my %hostoktimes; | ||
186 | |||
187 | # Number of matches in parsing | ||
188 | my $nummatch = 0; | ||
189 | |||
190 | if ($warning) { | ||
191 | if ($warning =~ /,/) { | ||
192 | @wlev = split /,/,$warning; | ||
193 | $warnlevel{"WARNING"} = $wlev[0]; | ||
194 | $warnlevel{"CRITICAL"} = $wlev[1]; | ||
195 | if ($wlev[2] ) { | ||
196 | $warnlevel{"UNKNOWN"} = $wlev[2]; | ||
197 | } | ||
198 | } | ||
199 | else { | ||
200 | $WarnOnly = $warning; | ||
201 | } | ||
202 | } | ||
203 | else { | ||
204 | $WarnOnly = 1; | ||
205 | } | ||
206 | |||
207 | if ($critical) { | ||
208 | if ($critical =~ /,/) { | ||
209 | @clev = split /,/,$critical; | ||
210 | $critlevel{"WARNING"} = $clev[0]; | ||
211 | $critlevel{"CRITICAL"} = $clev[1]; | ||
212 | if ($clev[2] ) { | ||
213 | $critlevel{"UNKNOWN"} = $clev[2]; | ||
214 | } | ||
215 | } | ||
216 | else { | ||
217 | $CritOnly = $critical; | ||
218 | } | ||
219 | } | ||
220 | else { | ||
221 | $CritOnly = 1; | ||
222 | } | ||
223 | |||
224 | if ($unknown) { | ||
225 | if ($unknown =~ /,/) { | ||
226 | @ulev = split /,/,$unknown; | ||
227 | $unklevel{"WARNING"} = $ulev[0]; | ||
228 | $unklevel{"CRITICAL"} = $ulev[1]; | ||
229 | if ($ulev[2] ) { | ||
230 | $unklevel{"UNKNOWN"} = $ulev[2]; | ||
231 | } | ||
232 | } | ||
233 | else { | ||
234 | $UnkOnly = $unknown; | ||
235 | } | ||
236 | } | ||
237 | else { | ||
238 | $UnkOnly = 1; | ||
239 | } | ||
240 | |||
241 | |||
242 | if (!$freshness) { | ||
243 | $freshness = 30 * 60; | ||
244 | } | ||
245 | else { | ||
246 | $freshness = $freshness * 60; | ||
247 | } | ||
248 | |||
249 | my %ct = ("CRITICAL",0,"WARNING",0,"UNKNOWN",0,"DOWN",0,"UNREACHABLE",0); | ||
250 | my %much_ct = ("CRITICAL",0,"WARNING",0,"UNKNOWN",0,"DOWN",0,"UNREACHABLE",0); | ||
251 | |||
252 | my %output = ("CRITICAL","","WARNING","","UNKNOWN","","DOWN","","UNREACHABLE",""); | ||
253 | my %much_output = ("CRITICAL","","WARNING","","UNKNOWN","","DOWN","","UNREACHABLE",""); | ||
254 | |||
255 | if ($details) { | ||
256 | if ($details =~ /,/) { | ||
257 | my @tempv = split /,/,$details; | ||
258 | $much_details = $tempv[0]; | ||
259 | $details = $tempv[1]; | ||
260 | } | ||
261 | } | ||
262 | |||
263 | open("sta","$status") || die "Cannot open status file $status!"; | ||
264 | |||
265 | $curr_time = time; | ||
266 | $file_time = stat($status)->mtime; | ||
267 | |||
268 | if ($curr_time - $file_time > $freshness) { | ||
269 | printf "State CRITICAL - Status file is stale!!!\n"; | ||
270 | exitcheck($CRITICAL); | ||
271 | } | ||
272 | |||
273 | while(<sta>) { | ||
274 | chomp; | ||
275 | if (/^[^\s]+[\s]+HOST;/) { | ||
276 | @hdata = split /;/,$_; | ||
277 | |||
278 | # If you care about matching hosts (not services): | ||
279 | if ($host && $hdata[1] =~ /$host/) { | ||
280 | $nummatch++; | ||
281 | if ( $hdata[2] =~ /$HostNotOK/ ) { | ||
282 | addproblem($_,$hdata[2]); | ||
283 | } | ||
284 | } | ||
285 | |||
286 | # If you are matching services, gather host information: | ||
287 | else { | ||
288 | if ( $hdata[2] =~ /$HostNotOK/ ) { | ||
289 | $hostdown[$numdown] = $hdata[1]; | ||
290 | $numdown++; | ||
291 | } | ||
292 | else { | ||
293 | $hostoktimes{$hdata[1]} = $hdata[4]; | ||
294 | } | ||
295 | if ( $hdata[17] ne "0" ) { | ||
296 | $hostdowntime[$numdowntime] = $hdata[1]; | ||
297 | $numdowntime++; | ||
298 | } | ||
299 | } | ||
300 | } | ||
301 | elsif (!$host && /^[^\s]+[\s]+SERVICE;/) { | ||
302 | @servdata = split /;/,$_; | ||
303 | if ( ( $pattern && ($_ =~ /$pattern/)) || | ||
304 | (($servdata[1] =~ /$servhost/) && ($servdata[2] =~ /$service/)) ){ | ||
305 | $nummatch++; | ||
306 | if (($servdata[5] eq "HARD") && ($servdata[3] =~ /$ServiceNotOK/)) { | ||
307 | addproblem($_,$servdata[3]); | ||
308 | } | ||
309 | } | ||
310 | } | ||
311 | } | ||
312 | |||
313 | close("sta"); | ||
314 | |||
315 | if ($nummatch==0) { | ||
316 | print "Nothing Matches your criteria!\n"; | ||
317 | exitcheck($UNKNOWN); | ||
318 | } | ||
319 | |||
320 | # Count the number of problems (for reference): | ||
321 | if ($host) { | ||
322 | $total = $numprob{"DOWN"} + $numprob{"UNREACHABLE"}; | ||
323 | } | ||
324 | else { | ||
325 | $total = $numprob{"WARNING"} + $numprob{"CRITICAL"} + $numprob{"UNKNOWN"}; | ||
326 | } | ||
327 | |||
328 | my $numok = $nummatch - $total; | ||
329 | |||
330 | # If this is a host state check: | ||
331 | if ($host) { | ||
332 | if ($numprob{"DOWN"}>0 || $numprob{"UNREACHABLE"}>0 ) { | ||
333 | if ($details && ($total <= $details)) { | ||
334 | print "State CRITICAL - $total Host Problems: $output{$down} $output{$unreach}\n"; | ||
335 | exitcheck($CRITICAL); | ||
336 | } | ||
337 | else { | ||
338 | print "State CRITICAL - $numprob{$down} Hosts Down, $numprob{$unreach} Hosts Unreachable\n"; | ||
339 | exitcheck($CRITICAL); | ||
340 | } | ||
341 | } | ||
342 | else { | ||
343 | print "State OK - $numok Hosts Up, $total Problems\n"; | ||
344 | exitcheck($OK); | ||
345 | } | ||
346 | } | ||
347 | |||
348 | #If you only defined a Critical level in terms of # of criticals... | ||
349 | elsif ($CritOnly && ($numprob{"CRITICAL"} >= $CritOnly)) { | ||
350 | countAndPrint($crit,$numprob{$crit},0); | ||
351 | exitcheck($CRITICAL); | ||
352 | } | ||
353 | |||
354 | #Critical in terms on # criticals and # warnings... | ||
355 | elsif (!$CritOnly && ($numprob{"WARNING"} >= $critlevel{"WARNING"} || | ||
356 | $numprob{"CRITICAL"} >= $critlevel{"CRITICAL"} || | ||
357 | $numprob{"UNKNOWN"} >= $critlevel{"UNKNOWN"} )) { | ||
358 | countAndPrint($crit,$total,1); | ||
359 | exitcheck($CRITICAL); | ||
360 | } | ||
361 | |||
362 | #Warning in terms of # warnings only... | ||
363 | elsif ($WarnOnly && ($numprob{"WARNING"} >= $WarnOnly)) { | ||
364 | countAndPrint($warn,$numprob{$warn},0); | ||
365 | exitcheck($WARNING); | ||
366 | } | ||
367 | |||
368 | #Warning in terms of # warnings and # criticals... | ||
369 | elsif (!$WarnOnly && ($numprob{"WARNING"} >= $warnlevel{"WARNING"} || | ||
370 | $numprob{"CRITICAL"} >= $warnlevel{"CRITICAL"} || | ||
371 | $numprob{"UNKNOWN"} >= $warnlevel{"UNKNOWN"})) { | ||
372 | countAndPrint($warn,$total,1); | ||
373 | exitcheck($WARNING); | ||
374 | } | ||
375 | |||
376 | #Unknown in terms on # unknown only... | ||
377 | elsif ( $UnkOnly && ($numprob{"UNKNOWN"}>=$UnkOnly) ) { | ||
378 | countAndPrint($unk,$numprob{$unk},0); | ||
379 | exitcheck($UNKNOWN); | ||
380 | } | ||
381 | |||
382 | #Unknown in terms of # warning, critical, and unknown... | ||
383 | elsif (!$UnkOnly && ($numprob{"WARNING"} >= $unklevel{"WARNING"} || | ||
384 | $numprob{"CRITICAL"} >= $unklevel{"CRITICAL"} || | ||
385 | $numprob{"UNKNOWN"} >= $unklevel{"UNKNOWN"})) { | ||
386 | countAndPrint($unk,$total,1); | ||
387 | exitcheck($UNKNOWN); | ||
388 | } | ||
389 | |||
390 | # Everything is OK! | ||
391 | else { | ||
392 | print "State OK - $numok OK, $total problems\n"; | ||
393 | exitcheck($OK); | ||
394 | } | ||
395 | |||
396 | |||
397 | |||
398 | ############################ | ||
399 | # Subroutines | ||
400 | ############################ | ||
401 | |||
402 | # Return the proper exit code for Critical, Warning, Unknown, or OK | ||
403 | sub exitcheck { | ||
404 | if ($ok) { | ||
405 | exit 0; | ||
406 | } | ||
407 | else { | ||
408 | exit $_[0]; | ||
409 | } | ||
410 | } | ||
411 | |||
412 | # Decide what to print for services: | ||
413 | sub countAndPrint { | ||
414 | my $state = $_[0]; | ||
415 | my $count = $_[1]; | ||
416 | my $alltypes = $_[2]; | ||
417 | my $output = "State $state - "; | ||
418 | |||
419 | if ($details) { | ||
420 | if ($count<=$much_details) { | ||
421 | if ($alltypes) { | ||
422 | $output .= "$count problems: $much_output{$crit} $much_output{$warn} $much_output{$unk}"; | ||
423 | } | ||
424 | else { | ||
425 | $output .= "$count \L$state\E: $much_output{$state}"; | ||
426 | } | ||
427 | } | ||
428 | elsif ($count<=$details) { | ||
429 | if ($alltypes) { | ||
430 | $output .= "$count problems: $output{$crit} $output{$warn} $output{$unk}"; | ||
431 | } | ||
432 | else { | ||
433 | $output .= "$count \L$state\E: $output{$state}"; | ||
434 | } | ||
435 | } | ||
436 | else { | ||
437 | if ($alltypes) { | ||
438 | $output .= "$numprob{$crit} critical, $numprob{$warn} warning, $numprob{$unk} unknown"; | ||
439 | } | ||
440 | else { | ||
441 | $output .= "$count \L$state\E"; | ||
442 | } | ||
443 | } | ||
444 | } | ||
445 | else { | ||
446 | $output .= "$count problems"; | ||
447 | } | ||
448 | |||
449 | print "$output\n"; | ||
450 | } | ||
451 | |||
452 | |||
453 | # Add-in the problem found in the status log | ||
454 | sub addproblem { | ||
455 | |||
456 | $test = 1; | ||
457 | $type = $_[1]; | ||
458 | my $diffout = ""; | ||
459 | |||
460 | my @values = split /;/,$_[0]; | ||
461 | |||
462 | if (!$host) { | ||
463 | my $namehold = $values[1]; | ||
464 | if ($ack && ($values[13] eq "1")) { | ||
465 | if ($ack =~ "ok") { | ||
466 | $test = 0; | ||
467 | } | ||
468 | else { | ||
469 | $type = "\U$ack"; | ||
470 | } | ||
471 | } | ||
472 | elsif ($hdown && grep /$namehold/, @hostdown) { | ||
473 | if ($hdown =~ "ok") { | ||
474 | $test = 0; | ||
475 | } | ||
476 | else { | ||
477 | $type = "\U$hdown"; | ||
478 | $diffout = "$values[1] is down"; | ||
479 | } | ||
480 | } | ||
481 | elsif ($dt && (($values[27] ne "0") || (grep /$namehold/, @hostdowntime))){ | ||
482 | if ($dt =~ "ok") { | ||
483 | $test = 0; | ||
484 | } | ||
485 | else { | ||
486 | $type = "\U$dt"; | ||
487 | } | ||
488 | } | ||
489 | elsif (exists $hostoktimes{$namehold}) { | ||
490 | # If the state change time of the host is more recent than the last | ||
491 | # service check, must wait until the next service check runs! | ||
492 | if ($hostoktimes{$namehold} > $values[6]) { | ||
493 | $test = 0; | ||
494 | } | ||
495 | } | ||
496 | } | ||
497 | else { | ||
498 | if ($ack && $values[5]) { | ||
499 | if ($ack =~ "ok") { | ||
500 | $test = 0; | ||
501 | } | ||
502 | else { | ||
503 | $type = "\U$ack"; | ||
504 | } | ||
505 | } | ||
506 | elsif ($dt && ($values[17] ne "0")) { | ||
507 | if ($dt =~ "ok") { | ||
508 | $test = 0; | ||
509 | } | ||
510 | else { | ||
511 | $type = "\U$dt"; | ||
512 | } | ||
513 | } | ||
514 | } | ||
515 | |||
516 | if ($details && $test) { | ||
517 | if (!$host) { | ||
518 | if ($diffout) { | ||
519 | $much_output{$type} .= " $diffout;"; | ||
520 | $output{$type} .= "$diffout;"; | ||
521 | $much_ct{$type}++; | ||
522 | $ct{$type}++; | ||
523 | } | ||
524 | else { | ||
525 | if ($much_details && $much_ct{$type}<$much_details) { | ||
526 | $much_output{$type} .= " $values[2] on $values[1] $values[31];"; | ||
527 | $much_ct{$type}++; | ||
528 | } | ||
529 | if ($ct{$type} < $details) { | ||
530 | $output{$type} .= " $values[2] on $values[1];"; | ||
531 | $ct{$type}++; | ||
532 | } | ||
533 | } | ||
534 | } | ||
535 | else { | ||
536 | $much_output{$type} .= " $values[1] $_[1] $values[20],"; | ||
537 | $much_ct{type}++; | ||
538 | $output{$type} .= " $values[1] HOST $_[1],"; | ||
539 | $ct{$type}++; | ||
540 | } | ||
541 | } | ||
542 | if ($test) { | ||
543 | $numprob{$type}++; | ||
544 | } | ||
545 | } | ||
546 | |||
547 | ################################ | ||
548 | # | ||
549 | # Version and Help Information | ||
550 | # | ||
551 | ################################ | ||
552 | |||
553 | sub printVersion { | ||
554 | printf <<EndVersion; | ||
555 | $0 (nagios-plugins) 1.3 | ||
556 | The nagios plugins come with ABSOLUTELY NO WARRANTY. You may redistribute | ||
557 | copies of the plugins under the terms of the GNU General Public License. | ||
558 | For more information about these matters, see the file named COPYING. | ||
559 | EndVersion | ||
560 | } | ||
561 | |||
562 | sub printHelp { | ||
563 | printf <<EOF; | ||
564 | |||
565 | This plugin parses through the Nagios status log and will return a | ||
566 | Critical, Warning, or Unknown state depending on the number of | ||
567 | Critical, Warning, and/or Unknown services found in the log | ||
568 | (or Down/Unreachable hosts when matching against hosts) | ||
569 | |||
570 | Usage: $0 -s <Status File> | -d <Nagios Log Directory> | ||
571 | [-w #[,#][,#]] [-c #[,#][,#]] [-u #[,#][,#]] | ||
572 | [--service=<RegEx> | --servhost=<RegEx> | --pattern=<RegEx> | | ||
573 | --host | --host=<RegEx>] | ||
574 | [--ack[=string]] [--dt[=string]] [--hostdown[=string]] | ||
575 | [-D #[,#]] [--ok] [-f <Log freshness in # minutes>] | ||
576 | $0 --help | ||
577 | $0 --version | ||
578 | NOTE: One of -s and -d must be specified | ||
579 | |||
580 | Options: | ||
581 | -s, --status=FILE_NAME | ||
582 | Location and name of status log (e.g. /usr/local/nagios/var/status.log) | ||
583 | -d, --dir=DIRECTORY_NAME | ||
584 | Directory that contains the nagios logs (e.g. /usr/local/nagios/var/) | ||
585 | -w, --warning=INTEGER[,INTEGER][,INTEGER] | ||
586 | #: Number of warnings to result in a WARNING state | ||
587 | OR | ||
588 | #,#: Warning,Criticals to result in a WARNING state | ||
589 | OR | ||
590 | #,#,#: Warning,Critical,Unknown to result in a WARNING state | ||
591 | Default: -w=1 | ||
592 | -c, --critical=INTEGER[,INTEGER][,INTEGER] | ||
593 | #: Number of criticals to result in a CRITICAL state | ||
594 | OR | ||
595 | #,#: Warning,Criticals to result in a CRITICAL state | ||
596 | OR | ||
597 | #,#,#: Warning,Critical,Unknown to result in a CRITICAL state | ||
598 | Default: -c=1 | ||
599 | -u, --unknown=INTEGER[,INTEGER][,INTEGER] | ||
600 | #: Number of unknowns to result in a UNKNOWN state | ||
601 | OR | ||
602 | #,#: Warning,Criticals to result in a UNKNOWN state | ||
603 | OR | ||
604 | #,#,#: Warning,Critical,Unknown to result in a UNKNOWN state | ||
605 | Default: -u=1 | ||
606 | -r, --service[=REGEX] | ||
607 | Only match services [that match the RegEx] | ||
608 | (--service is default setting if no other matching arguments provided) | ||
609 | --servhost=REGEX | ||
610 | Only match services whose host match the RegEx | ||
611 | -p, --pattern=REGEX | ||
612 | Only parse for this regular expression (services only, not hosts) | ||
613 | --host[=REGEX] | ||
614 | Report on the state of hosts (whose name matches the RegEx if provided) | ||
615 | -a, --ack[=ok|warning|critical|unknown|down|unreachable] | ||
616 | Handle Acknowledged problems [--ack defaults to ok] | ||
617 | --dt, --downtime[=ok|warning|critical|unknown|down|unreachable] | ||
618 | Handle problems in scheduled downtime [--dt defaults to ok] | ||
619 | --hd, --hostdown[=ok|warning|critical|unknown|down|unreachable] | ||
620 | Handle services whose Host is down [--hd defaults to ok] | ||
621 | -D, --details=INTEGER[,INTEGER] | ||
622 | Amount of verbosity to output | ||
623 | If # problems: | ||
624 | <= 1st integer, return full details (each plugin's output) | ||
625 | <= 2nd integer, return some details (list each service host pair) | ||
626 | > 2nd integer, return the # of problems | ||
627 | -f, --freshness=INTEGER | ||
628 | Number of minutes old the log can be to make sure Nagios is running | ||
629 | (Default = 30 minutes) | ||
630 | --ok | ||
631 | Return an OK exit code, regardless of number of problems found | ||
632 | -h, --help | ||
633 | Print detailed help screen | ||
634 | -V, --version | ||
635 | Print version information | ||
636 | |||
637 | For service checking (use --service and/or --servhost): | ||
638 | 1. The values of warning, critical, and unknown default to 1, i.e. | ||
639 | $0 will return CRITICAL if there is at least 1 critical service, | ||
640 | WARNING if there is at least 1 warning service, and UNKNOWN if there is | ||
641 | at least one unknown service. | ||
642 | |||
643 | 2. If a service's host is DOWN or UNREACHABLE, $0 will use the | ||
644 | value of --hostdown to determine how to treat the service. Without that | ||
645 | argument, $0 will count the service as OK. | ||
646 | |||
647 | 3. If a service's host is OK, but the last host-state change occurred more | ||
648 | recently than the last service check, $0 will ignore that service | ||
649 | (want to wait until the service has been checked after a host has recovered | ||
650 | or you may get service alert for services that still need to be checked) | ||
651 | |||
652 | 4. If the --dt, --ack, or --hd tags are used, $0 will use the value | ||
653 | of the arguments to determine how to handle services in downtime, acknowledged, | ||
654 | or with down hosts (default=OK). For service checks, --dt will also check | ||
655 | if the service's host is in a downtime. | ||
656 | |||
657 | For host checking (use --host): | ||
658 | 1. Using the --host argument, $0 will look for DOWN and UNREACHABLE | ||
659 | hosts. If any are found, $0 will return a CRITICAL. You can provide | ||
660 | an REGEX for --host to only check hosts with matching host names. | ||
661 | |||
662 | 2. If the --dt or --ack tags are used, $0 will use the value of the | ||
663 | --dt/--ack arguments to determine the state of the host (default is OK) | ||
664 | |||
665 | EOF | ||
666 | } | ||