summaryrefslogtreecommitdiffstats
path: root/plugins/check_overcr.c
diff options
context:
space:
mode:
Diffstat (limited to 'plugins/check_overcr.c')
-rw-r--r--plugins/check_overcr.c489
1 files changed, 489 insertions, 0 deletions
diff --git a/plugins/check_overcr.c b/plugins/check_overcr.c
new file mode 100644
index 00000000..305a8242
--- /dev/null
+++ b/plugins/check_overcr.c
@@ -0,0 +1,489 @@
1/******************************************************************************
2*
3* CHECK_OVERCR.C
4*
5* Program: Over-CR collector plugin for Nagios
6* License: GPL
7* Copyright (c) 1999 Ethan Galstad (nagios@nagios.org)
8*
9* $Id$
10*
11* Description:
12*
13* Notes:
14* - This plugin requires that Eric Molitors' Over-CR collector daemon
15* be running on any UNIX boxes you want to monitor. Over-CR
16* is available from * http://www.molitor.org/overcr/
17*
18* Modifications:
19*
20* 08-11-999 Jacob Lundqvist <jaclu@grm.se>
21* Load was presented as a one digit percentage - changed to two digit
22* value * before load of 11.2 was presented as "1.2%" (not very
23* high). Warning and Critical params were int's, not very good
24* for load, changed to doubles, so we can trap loadlimits like
25* 1.5. Also added more informative LOAD error messages.
26*
27* License Information:
28*
29* This program is free software; you can redistribute it and/or modify
30* it under the terms of the GNU General Public License as published by
31* the Free Software Foundation; either version 2 of the License, or
32* (at your option) any later version.
33*
34* This program is distributed in the hope that it will be useful,
35* but WITHOUT ANY WARRANTY; without even the implied warranty of
36* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
37* GNU General Public License for more details.
38*
39* You should have received a copy of the GNU General Public License
40* along with this program; if not, write to the Free Software
41* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
42*
43*****************************************************************************/
44
45#include "config.h"
46#include "common.h"
47#include "netutils.h"
48#include "utils.h"
49
50#define CHECK_NONE 0
51#define CHECK_LOAD1 1
52#define CHECK_LOAD5 2
53#define CHECK_LOAD15 4
54#define CHECK_DPU 8
55#define CHECK_PROCS 16
56#define CHECK_NETSTAT 32
57#define CHECK_UPTIME 64
58
59#define PORT 2000
60
61#define PROGNAME "check_overcr"
62
63char *server_address = NULL;
64int server_port = PORT;
65double warning_value = 0L;
66double critical_value = 0L;
67int check_warning_value = FALSE;
68int check_critical_value = FALSE;
69int vars_to_check = CHECK_NONE;
70int cmd_timeout = 1;
71
72int netstat_port = 0;
73char *disk_name = NULL;
74char *process_name = NULL;
75
76int process_arguments (int, char **);
77void print_usage (void);
78void print_help (void);
79
80int
81main (int argc, char **argv)
82{
83 int result;
84 char send_buffer[MAX_INPUT_BUFFER];
85 char recv_buffer[MAX_INPUT_BUFFER];
86 char output_message[MAX_INPUT_BUFFER];
87 char temp_buffer[MAX_INPUT_BUFFER];
88 char *temp_ptr = NULL;
89 int found_disk = FALSE;
90 unsigned long percent_used_disk_space = 100;
91 double load;
92 double load_1min;
93 double load_5min;
94 double load_15min;
95 int port_connections = 0;
96 int processes = 0;
97 double uptime_raw_hours;
98 int uptime_raw_minutes = 0;
99 int uptime_days = 0;
100 int uptime_hours = 0;
101 int uptime_minutes = 0;
102
103 if (process_arguments (argc, argv) == ERROR)
104 usage ("Could not parse arguments\n");
105
106 /* initialize alarm signal handling */
107 signal (SIGALRM, socket_timeout_alarm_handler);
108
109 /* set socket timeout */
110 alarm (socket_timeout);
111
112 result = STATE_OK;
113
114 if (vars_to_check == CHECK_LOAD1 || vars_to_check == CHECK_LOAD5
115 || vars_to_check == CHECK_LOAD15) {
116
117 strcpy (send_buffer, "LOAD\r\nQUIT\r\n");
118 result =
119 process_tcp_request2 (server_address, server_port, send_buffer,
120 recv_buffer, sizeof (recv_buffer));
121 if (result != STATE_OK)
122 return result;
123
124 temp_ptr = (char *) strtok (recv_buffer, "\r\n");
125 if (temp_ptr == NULL) {
126 printf ("Invalid response from server - no load information\n");
127 return STATE_CRITICAL;
128 }
129 load_1min = strtod (temp_ptr, NULL);
130 temp_ptr = (char *) strtok (NULL, "\r\n");
131 if (temp_ptr == NULL) {
132 printf ("Invalid response from server after load 1\n");
133 return STATE_CRITICAL;
134 }
135 load_5min = strtod (temp_ptr, NULL);
136 temp_ptr = (char *) strtok (NULL, "\r\n");
137 if (temp_ptr == NULL) {
138 printf ("Invalid response from server after load 5\n");
139 return STATE_CRITICAL;
140 }
141 load_15min = strtod (temp_ptr, NULL);
142
143
144 switch (vars_to_check) {
145 case CHECK_LOAD1:
146 strcpy (temp_buffer, "1");
147 load = load_1min;
148 break;
149 case CHECK_LOAD5:
150 strcpy (temp_buffer, "5");
151 load = load_5min;
152 break;
153 default:
154 strcpy (temp_buffer, "15");
155 load = load_15min;
156 break;
157 }
158
159 if (check_critical_value == TRUE && (load >= critical_value))
160 result = STATE_CRITICAL;
161 else if (check_warning_value == TRUE && (load >= warning_value))
162 result = STATE_WARNING;
163 sprintf (output_message, "Load %s - %s-min load average = %0.2f",
164 (result == STATE_OK) ? "ok" : "problem", temp_buffer, load);
165 }
166
167
168 else if (vars_to_check == CHECK_DPU) {
169
170 sprintf (send_buffer, "DISKSPACE\r\n");
171 result =
172 process_tcp_request2 (server_address, server_port, send_buffer,
173 recv_buffer, sizeof (recv_buffer));
174 if (result != STATE_OK)
175 return result;
176
177 for (temp_ptr = (char *) strtok (recv_buffer, " "); temp_ptr != NULL;
178 temp_ptr = (char *) strtok (NULL, " ")) {
179
180 if (!strcmp (temp_ptr, disk_name)) {
181 found_disk = TRUE;
182 temp_ptr = (char *) strtok (NULL, "%");
183 if (temp_ptr == NULL) {
184 printf ("Invalid response from server\n");
185 return STATE_CRITICAL;
186 }
187 percent_used_disk_space = strtoul (temp_ptr, NULL, 10);
188 break;
189 }
190
191 temp_ptr = (char *) strtok (NULL, "\r\n");
192 }
193
194 /* error if we couldn't find the info for the disk */
195 if (found_disk == FALSE) {
196 sprintf (output_message, "Error: Disk '%s' non-existent or not mounted",
197 disk_name);
198 result = STATE_CRITICAL;
199 }
200
201 /* else check the disk space used */
202 else {
203
204 if (check_critical_value == TRUE
205 && (percent_used_disk_space >= critical_value)) result =
206 STATE_CRITICAL;
207 else if (check_warning_value == TRUE
208 && (percent_used_disk_space >= warning_value)) result =
209 STATE_WARNING;
210
211 sprintf (output_message, "Disk %s - %lu%% used on %s",
212 (result == STATE_OK) ? "ok" : "problem",
213 percent_used_disk_space, disk_name);
214 }
215 }
216
217 else if (vars_to_check == CHECK_NETSTAT) {
218
219 sprintf (send_buffer, "NETSTAT %d\r\n", netstat_port);
220 result =
221 process_tcp_request2 (server_address, server_port, send_buffer,
222 recv_buffer, sizeof (recv_buffer));
223 if (result != STATE_OK)
224 return result;
225
226 port_connections = strtod (recv_buffer, NULL);
227
228 if (check_critical_value == TRUE && (port_connections >= critical_value))
229 result = STATE_CRITICAL;
230 else if (check_warning_value == TRUE
231 && (port_connections >= warning_value)) result = STATE_WARNING;
232
233 sprintf (output_message, "Net %s - %d connection%s on port %d",
234 (result == STATE_OK) ? "ok" : "problem", port_connections,
235 (port_connections == 1) ? "" : "s", netstat_port);
236 }
237
238 else if (vars_to_check == CHECK_PROCS) {
239
240 sprintf (send_buffer, "PROCESS %s\r\n", process_name);
241 result =
242 process_tcp_request2 (server_address, server_port, send_buffer,
243 recv_buffer, sizeof (recv_buffer));
244 if (result != STATE_OK)
245 return result;
246
247 temp_ptr = (char *) strtok (recv_buffer, "(");
248 if (temp_ptr == NULL) {
249 printf ("Invalid response from server\n");
250 return STATE_CRITICAL;
251 }
252 temp_ptr = (char *) strtok (NULL, ")");
253 if (temp_ptr == NULL) {
254 printf ("Invalid response from server\n");
255 return STATE_CRITICAL;
256 }
257 processes = strtod (temp_ptr, NULL);
258
259 if (check_critical_value == TRUE && (processes >= critical_value))
260 result = STATE_CRITICAL;
261 else if (check_warning_value == TRUE && (processes >= warning_value))
262 result = STATE_WARNING;
263
264 sprintf (output_message, "Process %s - %d instance%s of %s running",
265 (result == STATE_OK) ? "ok" : "problem", processes,
266 (processes == 1) ? "" : "s", process_name);
267 }
268
269 else if (vars_to_check == CHECK_UPTIME) {
270
271 sprintf (send_buffer, "UPTIME\r\n");
272 result =
273 process_tcp_request2 (server_address, server_port, send_buffer,
274 recv_buffer, sizeof (recv_buffer));
275 if (result != STATE_OK)
276 return result;
277
278 uptime_raw_hours = strtod (recv_buffer, NULL);
279 uptime_raw_minutes = (unsigned long) (uptime_raw_hours * 60.0);
280
281 if (check_critical_value == TRUE
282 && (uptime_raw_minutes <= critical_value)) result = STATE_CRITICAL;
283 else if (check_warning_value == TRUE
284 && (uptime_raw_minutes <= warning_value)) result = STATE_WARNING;
285
286 uptime_days = uptime_raw_minutes / 1440;
287 uptime_raw_minutes %= 1440;
288 uptime_hours = uptime_raw_minutes / 60;
289 uptime_raw_minutes %= 60;
290 uptime_minutes = uptime_raw_minutes;
291
292 sprintf (output_message, "Uptime %s - Up %d days %d hours %d minutes",
293 (result == STATE_OK) ? "ok" : "problem", uptime_days,
294 uptime_hours, uptime_minutes);
295 }
296
297 else {
298 strcpy (output_message, "Nothing to check!\n");
299 result = STATE_UNKNOWN;
300 }
301
302 /* reset timeout */
303 alarm (0);
304
305 printf ("%s\n", output_message);
306
307 return result;
308}
309
310
311
312
313
314/* process command-line arguments */
315int
316process_arguments (int argc, char **argv)
317{
318 int c;
319
320#ifdef HAVE_GETOPT_H
321 int option_index = 0;
322 static struct option long_options[] = {
323 {"port", required_argument, 0, 'p'},
324 {"timeout", required_argument, 0, 't'},
325 {"critical", required_argument, 0, 'c'},
326 {"warning", required_argument, 0, 'w'},
327 {"variable", required_argument, 0, 'v'},
328 {"hostname", required_argument, 0, 'H'},
329 {"version", no_argument, 0, 'V'},
330 {"help", no_argument, 0, 'h'},
331 {0, 0, 0, 0}
332 };
333#endif
334
335 /* no options were supplied */
336 if (argc < 2)
337 return ERROR;
338
339 /* backwards compatibility */
340 if (!is_option (argv[1])) {
341 server_address = argv[1];
342 argv[1] = argv[0];
343 argv = &argv[1];
344 argc--;
345 }
346
347 for (c = 1; c < argc; c++) {
348 if (strcmp ("-to", argv[c]) == 0)
349 strcpy (argv[c], "-t");
350 else if (strcmp ("-wv", argv[c]) == 0)
351 strcpy (argv[c], "-w");
352 else if (strcmp ("-cv", argv[c]) == 0)
353 strcpy (argv[c], "-c");
354 }
355
356 while (1) {
357#ifdef HAVE_GETOPT_H
358 c =
359 getopt_long (argc, argv, "+hVH:t:c:w:p:v:", long_options,
360 &option_index);
361#else
362 c = getopt (argc, argv, "+hVH:t:c:w:p:v:");
363#endif
364
365 if (c == -1 || c == EOF || c == 1)
366 break;
367
368 switch (c) {
369 case '?': /* print short usage statement if args not parsable */
370 printf ("%s: Unknown argument: %s\n\n", my_basename (argv[0]), optarg);
371 print_usage ();
372 exit (STATE_UNKNOWN);
373 case 'h': /* help */
374 print_help ();
375 exit (STATE_OK);
376 case 'V': /* version */
377 print_revision (my_basename (argv[0]), "$Revision$");
378 exit (STATE_OK);
379 case 'H': /* hostname */
380 server_address = optarg;
381 break;
382 case 'p': /* port */
383 if (is_intnonneg (optarg))
384 server_port = atoi (optarg);
385 else
386 terminate (STATE_UNKNOWN,
387 "Server port an integer (seconds)\nType '%s -h' for additional help\n",
388 PROGNAME);
389 break;
390 case 'v': /* variable */
391 if (strcmp (optarg, "LOAD1") == 0)
392 vars_to_check = CHECK_LOAD1;
393 else if (strcmp (optarg, "LOAD5") == 0)
394 vars_to_check = CHECK_LOAD5;
395 else if (strcmp (optarg, "LOAD15") == 0)
396 vars_to_check = CHECK_LOAD15;
397 else if (strcmp (optarg, "UPTIME") == 0)
398 vars_to_check = CHECK_UPTIME;
399 else if (strstr (optarg, "PROC") == optarg) {
400 vars_to_check = CHECK_PROCS;
401 process_name = strscpy (process_name, optarg + 4);
402 }
403 else if (strstr (optarg, "NET") == optarg) {
404 vars_to_check = CHECK_NETSTAT;
405 netstat_port = atoi (optarg + 3);
406 }
407 else if (strstr (optarg, "DPU") == optarg) {
408 vars_to_check = CHECK_DPU;
409 disk_name = strscpy (disk_name, optarg + 3);
410 }
411 else
412 return ERROR;
413 break;
414 case 'w': /* warning threshold */
415 warning_value = strtoul (optarg, NULL, 10);
416 check_warning_value = TRUE;
417 break;
418 case 'c': /* critical threshold */
419 critical_value = strtoul (optarg, NULL, 10);
420 check_critical_value = TRUE;
421 break;
422 case 't': /* timeout */
423 socket_timeout = atoi (optarg);
424 if (socket_timeout <= 0)
425 return ERROR;
426 }
427
428 }
429 return OK;
430}
431
432
433
434
435
436void
437print_usage (void)
438{
439 printf
440 ("Usage: %s -H host [-p port] [-v variable] [-w warning] [-c critical] [-t timeout]\n",
441 PROGNAME);
442}
443
444
445
446
447
448void
449print_help (void)
450{
451 print_revision (PROGNAME, "$Revision$");
452 printf
453 ("Copyright (c) 2000 Ethan Galstad/Karl DeBisschop\n\n"
454 "This plugin attempts to contact the Over-CR collector daemon running on the\n"
455 "remote UNIX server in order to gather the requested system information. This\n"
456 "plugin requres that Eric Molitors' Over-CR collector daemon be running on the\n"
457 "remote server. Over-CR can be downloaded from http://www.molitor.org/overcr\n"
458 "(This plugin was tested with version 0.99.53 of the Over-CR collector)\n\n");
459 print_usage ();
460 printf
461 ("\nOptions:\n"
462 "-H, --hostname=HOST\n"
463 " Name of the host to check\n"
464 "-p, --port=INTEGER\n"
465 " Optional port number (default: %d)\n"
466 "-v, --variable=STRING\n"
467 " Variable to check. Valid variables include:\n"
468 " LOAD1 = 1 minute average CPU load\n"
469 " LOAD5 = 5 minute average CPU load\n"
470 " LOAD15 = 15 minute average CPU load\n"
471 " DPU<filesys> = percent used disk space on filesystem <filesys>\n"
472 " PROC<process> = number of running processes with name <process>\n"
473 " NET<port> = number of active connections on TCP port <port>\n"
474 " UPTIME = system uptime in seconds\n"
475 " -w, --warning=INTEGER\n"
476 " Threshold which will result in a warning status\n"
477 " -c, --critical=INTEGER\n"
478 " Threshold which will result in a critical status\n"
479 " -t, --timeout=INTEGER\n"
480 " Seconds before connection attempt times out (default: %d)\n"
481 "-h, --help\n"
482 " Print this help screen\n"
483 "-V, --version\n"
484 " Print version information\n\n"
485 "Notes:\n"
486 " - For the available options, the critical threshold value should always be\n"
487 " higher than the warning threshold value, EXCEPT with the uptime variable\n"
488 " (i.e. lower uptimes are worse).\n", PORT, DEFAULT_SOCKET_TIMEOUT);
489}