diff options
Diffstat (limited to 'plugins/check_overcr.c')
-rw-r--r-- | plugins/check_overcr.c | 489 |
1 files changed, 489 insertions, 0 deletions
diff --git a/plugins/check_overcr.c b/plugins/check_overcr.c new file mode 100644 index 00000000..305a8242 --- /dev/null +++ b/plugins/check_overcr.c | |||
@@ -0,0 +1,489 @@ | |||
1 | /****************************************************************************** | ||
2 | * | ||
3 | * CHECK_OVERCR.C | ||
4 | * | ||
5 | * Program: Over-CR collector plugin for Nagios | ||
6 | * License: GPL | ||
7 | * Copyright (c) 1999 Ethan Galstad (nagios@nagios.org) | ||
8 | * | ||
9 | * $Id$ | ||
10 | * | ||
11 | * Description: | ||
12 | * | ||
13 | * Notes: | ||
14 | * - This plugin requires that Eric Molitors' Over-CR collector daemon | ||
15 | * be running on any UNIX boxes you want to monitor. Over-CR | ||
16 | * is available from * http://www.molitor.org/overcr/ | ||
17 | * | ||
18 | * Modifications: | ||
19 | * | ||
20 | * 08-11-999 Jacob Lundqvist <jaclu@grm.se> | ||
21 | * Load was presented as a one digit percentage - changed to two digit | ||
22 | * value * before load of 11.2 was presented as "1.2%" (not very | ||
23 | * high). Warning and Critical params were int's, not very good | ||
24 | * for load, changed to doubles, so we can trap loadlimits like | ||
25 | * 1.5. Also added more informative LOAD error messages. | ||
26 | * | ||
27 | * License Information: | ||
28 | * | ||
29 | * This program is free software; you can redistribute it and/or modify | ||
30 | * it under the terms of the GNU General Public License as published by | ||
31 | * the Free Software Foundation; either version 2 of the License, or | ||
32 | * (at your option) any later version. | ||
33 | * | ||
34 | * This program is distributed in the hope that it will be useful, | ||
35 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
36 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
37 | * GNU General Public License for more details. | ||
38 | * | ||
39 | * You should have received a copy of the GNU General Public License | ||
40 | * along with this program; if not, write to the Free Software | ||
41 | * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. | ||
42 | * | ||
43 | *****************************************************************************/ | ||
44 | |||
45 | #include "config.h" | ||
46 | #include "common.h" | ||
47 | #include "netutils.h" | ||
48 | #include "utils.h" | ||
49 | |||
50 | #define CHECK_NONE 0 | ||
51 | #define CHECK_LOAD1 1 | ||
52 | #define CHECK_LOAD5 2 | ||
53 | #define CHECK_LOAD15 4 | ||
54 | #define CHECK_DPU 8 | ||
55 | #define CHECK_PROCS 16 | ||
56 | #define CHECK_NETSTAT 32 | ||
57 | #define CHECK_UPTIME 64 | ||
58 | |||
59 | #define PORT 2000 | ||
60 | |||
61 | #define PROGNAME "check_overcr" | ||
62 | |||
63 | char *server_address = NULL; | ||
64 | int server_port = PORT; | ||
65 | double warning_value = 0L; | ||
66 | double critical_value = 0L; | ||
67 | int check_warning_value = FALSE; | ||
68 | int check_critical_value = FALSE; | ||
69 | int vars_to_check = CHECK_NONE; | ||
70 | int cmd_timeout = 1; | ||
71 | |||
72 | int netstat_port = 0; | ||
73 | char *disk_name = NULL; | ||
74 | char *process_name = NULL; | ||
75 | |||
76 | int process_arguments (int, char **); | ||
77 | void print_usage (void); | ||
78 | void print_help (void); | ||
79 | |||
80 | int | ||
81 | main (int argc, char **argv) | ||
82 | { | ||
83 | int result; | ||
84 | char send_buffer[MAX_INPUT_BUFFER]; | ||
85 | char recv_buffer[MAX_INPUT_BUFFER]; | ||
86 | char output_message[MAX_INPUT_BUFFER]; | ||
87 | char temp_buffer[MAX_INPUT_BUFFER]; | ||
88 | char *temp_ptr = NULL; | ||
89 | int found_disk = FALSE; | ||
90 | unsigned long percent_used_disk_space = 100; | ||
91 | double load; | ||
92 | double load_1min; | ||
93 | double load_5min; | ||
94 | double load_15min; | ||
95 | int port_connections = 0; | ||
96 | int processes = 0; | ||
97 | double uptime_raw_hours; | ||
98 | int uptime_raw_minutes = 0; | ||
99 | int uptime_days = 0; | ||
100 | int uptime_hours = 0; | ||
101 | int uptime_minutes = 0; | ||
102 | |||
103 | if (process_arguments (argc, argv) == ERROR) | ||
104 | usage ("Could not parse arguments\n"); | ||
105 | |||
106 | /* initialize alarm signal handling */ | ||
107 | signal (SIGALRM, socket_timeout_alarm_handler); | ||
108 | |||
109 | /* set socket timeout */ | ||
110 | alarm (socket_timeout); | ||
111 | |||
112 | result = STATE_OK; | ||
113 | |||
114 | if (vars_to_check == CHECK_LOAD1 || vars_to_check == CHECK_LOAD5 | ||
115 | || vars_to_check == CHECK_LOAD15) { | ||
116 | |||
117 | strcpy (send_buffer, "LOAD\r\nQUIT\r\n"); | ||
118 | result = | ||
119 | process_tcp_request2 (server_address, server_port, send_buffer, | ||
120 | recv_buffer, sizeof (recv_buffer)); | ||
121 | if (result != STATE_OK) | ||
122 | return result; | ||
123 | |||
124 | temp_ptr = (char *) strtok (recv_buffer, "\r\n"); | ||
125 | if (temp_ptr == NULL) { | ||
126 | printf ("Invalid response from server - no load information\n"); | ||
127 | return STATE_CRITICAL; | ||
128 | } | ||
129 | load_1min = strtod (temp_ptr, NULL); | ||
130 | temp_ptr = (char *) strtok (NULL, "\r\n"); | ||
131 | if (temp_ptr == NULL) { | ||
132 | printf ("Invalid response from server after load 1\n"); | ||
133 | return STATE_CRITICAL; | ||
134 | } | ||
135 | load_5min = strtod (temp_ptr, NULL); | ||
136 | temp_ptr = (char *) strtok (NULL, "\r\n"); | ||
137 | if (temp_ptr == NULL) { | ||
138 | printf ("Invalid response from server after load 5\n"); | ||
139 | return STATE_CRITICAL; | ||
140 | } | ||
141 | load_15min = strtod (temp_ptr, NULL); | ||
142 | |||
143 | |||
144 | switch (vars_to_check) { | ||
145 | case CHECK_LOAD1: | ||
146 | strcpy (temp_buffer, "1"); | ||
147 | load = load_1min; | ||
148 | break; | ||
149 | case CHECK_LOAD5: | ||
150 | strcpy (temp_buffer, "5"); | ||
151 | load = load_5min; | ||
152 | break; | ||
153 | default: | ||
154 | strcpy (temp_buffer, "15"); | ||
155 | load = load_15min; | ||
156 | break; | ||
157 | } | ||
158 | |||
159 | if (check_critical_value == TRUE && (load >= critical_value)) | ||
160 | result = STATE_CRITICAL; | ||
161 | else if (check_warning_value == TRUE && (load >= warning_value)) | ||
162 | result = STATE_WARNING; | ||
163 | sprintf (output_message, "Load %s - %s-min load average = %0.2f", | ||
164 | (result == STATE_OK) ? "ok" : "problem", temp_buffer, load); | ||
165 | } | ||
166 | |||
167 | |||
168 | else if (vars_to_check == CHECK_DPU) { | ||
169 | |||
170 | sprintf (send_buffer, "DISKSPACE\r\n"); | ||
171 | result = | ||
172 | process_tcp_request2 (server_address, server_port, send_buffer, | ||
173 | recv_buffer, sizeof (recv_buffer)); | ||
174 | if (result != STATE_OK) | ||
175 | return result; | ||
176 | |||
177 | for (temp_ptr = (char *) strtok (recv_buffer, " "); temp_ptr != NULL; | ||
178 | temp_ptr = (char *) strtok (NULL, " ")) { | ||
179 | |||
180 | if (!strcmp (temp_ptr, disk_name)) { | ||
181 | found_disk = TRUE; | ||
182 | temp_ptr = (char *) strtok (NULL, "%"); | ||
183 | if (temp_ptr == NULL) { | ||
184 | printf ("Invalid response from server\n"); | ||
185 | return STATE_CRITICAL; | ||
186 | } | ||
187 | percent_used_disk_space = strtoul (temp_ptr, NULL, 10); | ||
188 | break; | ||
189 | } | ||
190 | |||
191 | temp_ptr = (char *) strtok (NULL, "\r\n"); | ||
192 | } | ||
193 | |||
194 | /* error if we couldn't find the info for the disk */ | ||
195 | if (found_disk == FALSE) { | ||
196 | sprintf (output_message, "Error: Disk '%s' non-existent or not mounted", | ||
197 | disk_name); | ||
198 | result = STATE_CRITICAL; | ||
199 | } | ||
200 | |||
201 | /* else check the disk space used */ | ||
202 | else { | ||
203 | |||
204 | if (check_critical_value == TRUE | ||
205 | && (percent_used_disk_space >= critical_value)) result = | ||
206 | STATE_CRITICAL; | ||
207 | else if (check_warning_value == TRUE | ||
208 | && (percent_used_disk_space >= warning_value)) result = | ||
209 | STATE_WARNING; | ||
210 | |||
211 | sprintf (output_message, "Disk %s - %lu%% used on %s", | ||
212 | (result == STATE_OK) ? "ok" : "problem", | ||
213 | percent_used_disk_space, disk_name); | ||
214 | } | ||
215 | } | ||
216 | |||
217 | else if (vars_to_check == CHECK_NETSTAT) { | ||
218 | |||
219 | sprintf (send_buffer, "NETSTAT %d\r\n", netstat_port); | ||
220 | result = | ||
221 | process_tcp_request2 (server_address, server_port, send_buffer, | ||
222 | recv_buffer, sizeof (recv_buffer)); | ||
223 | if (result != STATE_OK) | ||
224 | return result; | ||
225 | |||
226 | port_connections = strtod (recv_buffer, NULL); | ||
227 | |||
228 | if (check_critical_value == TRUE && (port_connections >= critical_value)) | ||
229 | result = STATE_CRITICAL; | ||
230 | else if (check_warning_value == TRUE | ||
231 | && (port_connections >= warning_value)) result = STATE_WARNING; | ||
232 | |||
233 | sprintf (output_message, "Net %s - %d connection%s on port %d", | ||
234 | (result == STATE_OK) ? "ok" : "problem", port_connections, | ||
235 | (port_connections == 1) ? "" : "s", netstat_port); | ||
236 | } | ||
237 | |||
238 | else if (vars_to_check == CHECK_PROCS) { | ||
239 | |||
240 | sprintf (send_buffer, "PROCESS %s\r\n", process_name); | ||
241 | result = | ||
242 | process_tcp_request2 (server_address, server_port, send_buffer, | ||
243 | recv_buffer, sizeof (recv_buffer)); | ||
244 | if (result != STATE_OK) | ||
245 | return result; | ||
246 | |||
247 | temp_ptr = (char *) strtok (recv_buffer, "("); | ||
248 | if (temp_ptr == NULL) { | ||
249 | printf ("Invalid response from server\n"); | ||
250 | return STATE_CRITICAL; | ||
251 | } | ||
252 | temp_ptr = (char *) strtok (NULL, ")"); | ||
253 | if (temp_ptr == NULL) { | ||
254 | printf ("Invalid response from server\n"); | ||
255 | return STATE_CRITICAL; | ||
256 | } | ||
257 | processes = strtod (temp_ptr, NULL); | ||
258 | |||
259 | if (check_critical_value == TRUE && (processes >= critical_value)) | ||
260 | result = STATE_CRITICAL; | ||
261 | else if (check_warning_value == TRUE && (processes >= warning_value)) | ||
262 | result = STATE_WARNING; | ||
263 | |||
264 | sprintf (output_message, "Process %s - %d instance%s of %s running", | ||
265 | (result == STATE_OK) ? "ok" : "problem", processes, | ||
266 | (processes == 1) ? "" : "s", process_name); | ||
267 | } | ||
268 | |||
269 | else if (vars_to_check == CHECK_UPTIME) { | ||
270 | |||
271 | sprintf (send_buffer, "UPTIME\r\n"); | ||
272 | result = | ||
273 | process_tcp_request2 (server_address, server_port, send_buffer, | ||
274 | recv_buffer, sizeof (recv_buffer)); | ||
275 | if (result != STATE_OK) | ||
276 | return result; | ||
277 | |||
278 | uptime_raw_hours = strtod (recv_buffer, NULL); | ||
279 | uptime_raw_minutes = (unsigned long) (uptime_raw_hours * 60.0); | ||
280 | |||
281 | if (check_critical_value == TRUE | ||
282 | && (uptime_raw_minutes <= critical_value)) result = STATE_CRITICAL; | ||
283 | else if (check_warning_value == TRUE | ||
284 | && (uptime_raw_minutes <= warning_value)) result = STATE_WARNING; | ||
285 | |||
286 | uptime_days = uptime_raw_minutes / 1440; | ||
287 | uptime_raw_minutes %= 1440; | ||
288 | uptime_hours = uptime_raw_minutes / 60; | ||
289 | uptime_raw_minutes %= 60; | ||
290 | uptime_minutes = uptime_raw_minutes; | ||
291 | |||
292 | sprintf (output_message, "Uptime %s - Up %d days %d hours %d minutes", | ||
293 | (result == STATE_OK) ? "ok" : "problem", uptime_days, | ||
294 | uptime_hours, uptime_minutes); | ||
295 | } | ||
296 | |||
297 | else { | ||
298 | strcpy (output_message, "Nothing to check!\n"); | ||
299 | result = STATE_UNKNOWN; | ||
300 | } | ||
301 | |||
302 | /* reset timeout */ | ||
303 | alarm (0); | ||
304 | |||
305 | printf ("%s\n", output_message); | ||
306 | |||
307 | return result; | ||
308 | } | ||
309 | |||
310 | |||
311 | |||
312 | |||
313 | |||
314 | /* process command-line arguments */ | ||
315 | int | ||
316 | process_arguments (int argc, char **argv) | ||
317 | { | ||
318 | int c; | ||
319 | |||
320 | #ifdef HAVE_GETOPT_H | ||
321 | int option_index = 0; | ||
322 | static struct option long_options[] = { | ||
323 | {"port", required_argument, 0, 'p'}, | ||
324 | {"timeout", required_argument, 0, 't'}, | ||
325 | {"critical", required_argument, 0, 'c'}, | ||
326 | {"warning", required_argument, 0, 'w'}, | ||
327 | {"variable", required_argument, 0, 'v'}, | ||
328 | {"hostname", required_argument, 0, 'H'}, | ||
329 | {"version", no_argument, 0, 'V'}, | ||
330 | {"help", no_argument, 0, 'h'}, | ||
331 | {0, 0, 0, 0} | ||
332 | }; | ||
333 | #endif | ||
334 | |||
335 | /* no options were supplied */ | ||
336 | if (argc < 2) | ||
337 | return ERROR; | ||
338 | |||
339 | /* backwards compatibility */ | ||
340 | if (!is_option (argv[1])) { | ||
341 | server_address = argv[1]; | ||
342 | argv[1] = argv[0]; | ||
343 | argv = &argv[1]; | ||
344 | argc--; | ||
345 | } | ||
346 | |||
347 | for (c = 1; c < argc; c++) { | ||
348 | if (strcmp ("-to", argv[c]) == 0) | ||
349 | strcpy (argv[c], "-t"); | ||
350 | else if (strcmp ("-wv", argv[c]) == 0) | ||
351 | strcpy (argv[c], "-w"); | ||
352 | else if (strcmp ("-cv", argv[c]) == 0) | ||
353 | strcpy (argv[c], "-c"); | ||
354 | } | ||
355 | |||
356 | while (1) { | ||
357 | #ifdef HAVE_GETOPT_H | ||
358 | c = | ||
359 | getopt_long (argc, argv, "+hVH:t:c:w:p:v:", long_options, | ||
360 | &option_index); | ||
361 | #else | ||
362 | c = getopt (argc, argv, "+hVH:t:c:w:p:v:"); | ||
363 | #endif | ||
364 | |||
365 | if (c == -1 || c == EOF || c == 1) | ||
366 | break; | ||
367 | |||
368 | switch (c) { | ||
369 | case '?': /* print short usage statement if args not parsable */ | ||
370 | printf ("%s: Unknown argument: %s\n\n", my_basename (argv[0]), optarg); | ||
371 | print_usage (); | ||
372 | exit (STATE_UNKNOWN); | ||
373 | case 'h': /* help */ | ||
374 | print_help (); | ||
375 | exit (STATE_OK); | ||
376 | case 'V': /* version */ | ||
377 | print_revision (my_basename (argv[0]), "$Revision$"); | ||
378 | exit (STATE_OK); | ||
379 | case 'H': /* hostname */ | ||
380 | server_address = optarg; | ||
381 | break; | ||
382 | case 'p': /* port */ | ||
383 | if (is_intnonneg (optarg)) | ||
384 | server_port = atoi (optarg); | ||
385 | else | ||
386 | terminate (STATE_UNKNOWN, | ||
387 | "Server port an integer (seconds)\nType '%s -h' for additional help\n", | ||
388 | PROGNAME); | ||
389 | break; | ||
390 | case 'v': /* variable */ | ||
391 | if (strcmp (optarg, "LOAD1") == 0) | ||
392 | vars_to_check = CHECK_LOAD1; | ||
393 | else if (strcmp (optarg, "LOAD5") == 0) | ||
394 | vars_to_check = CHECK_LOAD5; | ||
395 | else if (strcmp (optarg, "LOAD15") == 0) | ||
396 | vars_to_check = CHECK_LOAD15; | ||
397 | else if (strcmp (optarg, "UPTIME") == 0) | ||
398 | vars_to_check = CHECK_UPTIME; | ||
399 | else if (strstr (optarg, "PROC") == optarg) { | ||
400 | vars_to_check = CHECK_PROCS; | ||
401 | process_name = strscpy (process_name, optarg + 4); | ||
402 | } | ||
403 | else if (strstr (optarg, "NET") == optarg) { | ||
404 | vars_to_check = CHECK_NETSTAT; | ||
405 | netstat_port = atoi (optarg + 3); | ||
406 | } | ||
407 | else if (strstr (optarg, "DPU") == optarg) { | ||
408 | vars_to_check = CHECK_DPU; | ||
409 | disk_name = strscpy (disk_name, optarg + 3); | ||
410 | } | ||
411 | else | ||
412 | return ERROR; | ||
413 | break; | ||
414 | case 'w': /* warning threshold */ | ||
415 | warning_value = strtoul (optarg, NULL, 10); | ||
416 | check_warning_value = TRUE; | ||
417 | break; | ||
418 | case 'c': /* critical threshold */ | ||
419 | critical_value = strtoul (optarg, NULL, 10); | ||
420 | check_critical_value = TRUE; | ||
421 | break; | ||
422 | case 't': /* timeout */ | ||
423 | socket_timeout = atoi (optarg); | ||
424 | if (socket_timeout <= 0) | ||
425 | return ERROR; | ||
426 | } | ||
427 | |||
428 | } | ||
429 | return OK; | ||
430 | } | ||
431 | |||
432 | |||
433 | |||
434 | |||
435 | |||
436 | void | ||
437 | print_usage (void) | ||
438 | { | ||
439 | printf | ||
440 | ("Usage: %s -H host [-p port] [-v variable] [-w warning] [-c critical] [-t timeout]\n", | ||
441 | PROGNAME); | ||
442 | } | ||
443 | |||
444 | |||
445 | |||
446 | |||
447 | |||
448 | void | ||
449 | print_help (void) | ||
450 | { | ||
451 | print_revision (PROGNAME, "$Revision$"); | ||
452 | printf | ||
453 | ("Copyright (c) 2000 Ethan Galstad/Karl DeBisschop\n\n" | ||
454 | "This plugin attempts to contact the Over-CR collector daemon running on the\n" | ||
455 | "remote UNIX server in order to gather the requested system information. This\n" | ||
456 | "plugin requres that Eric Molitors' Over-CR collector daemon be running on the\n" | ||
457 | "remote server. Over-CR can be downloaded from http://www.molitor.org/overcr\n" | ||
458 | "(This plugin was tested with version 0.99.53 of the Over-CR collector)\n\n"); | ||
459 | print_usage (); | ||
460 | printf | ||
461 | ("\nOptions:\n" | ||
462 | "-H, --hostname=HOST\n" | ||
463 | " Name of the host to check\n" | ||
464 | "-p, --port=INTEGER\n" | ||
465 | " Optional port number (default: %d)\n" | ||
466 | "-v, --variable=STRING\n" | ||
467 | " Variable to check. Valid variables include:\n" | ||
468 | " LOAD1 = 1 minute average CPU load\n" | ||
469 | " LOAD5 = 5 minute average CPU load\n" | ||
470 | " LOAD15 = 15 minute average CPU load\n" | ||
471 | " DPU<filesys> = percent used disk space on filesystem <filesys>\n" | ||
472 | " PROC<process> = number of running processes with name <process>\n" | ||
473 | " NET<port> = number of active connections on TCP port <port>\n" | ||
474 | " UPTIME = system uptime in seconds\n" | ||
475 | " -w, --warning=INTEGER\n" | ||
476 | " Threshold which will result in a warning status\n" | ||
477 | " -c, --critical=INTEGER\n" | ||
478 | " Threshold which will result in a critical status\n" | ||
479 | " -t, --timeout=INTEGER\n" | ||
480 | " Seconds before connection attempt times out (default: %d)\n" | ||
481 | "-h, --help\n" | ||
482 | " Print this help screen\n" | ||
483 | "-V, --version\n" | ||
484 | " Print version information\n\n" | ||
485 | "Notes:\n" | ||
486 | " - For the available options, the critical threshold value should always be\n" | ||
487 | " higher than the warning threshold value, EXCEPT with the uptime variable\n" | ||
488 | " (i.e. lower uptimes are worse).\n", PORT, DEFAULT_SOCKET_TIMEOUT); | ||
489 | } | ||