summaryrefslogtreecommitdiffstats
path: root/plugins
diff options
context:
space:
mode:
authorTon Voon <tonvoon@users.sourceforge.net>2008-03-19 14:42:12 +0000
committerTon Voon <tonvoon@users.sourceforge.net>2008-03-19 14:42:12 +0000
commitb17b2421987bb8a7606948333e75f990b35852b8 (patch)
tree5d220f355728f1448f9dbd6f796631bbfef0733c /plugins
parentcab9440a671390e279228cd31ad56b055d611a21 (diff)
downloadmonitoring-plugins-b17b2421987bb8a7606948333e75f990b35852b8.tar.gz
1st pass at check_procs with multiple threshold checks
git-svn-id: https://nagiosplug.svn.sourceforge.net/svnroot/nagiosplug/nagiosplug/branches/new_threshold_syntax@1958 f882894a-f735-0410-b71e-b25c423dba1c
Diffstat (limited to 'plugins')
-rw-r--r--plugins/check_cluster.c2
-rw-r--r--plugins/check_procs.c238
2 files changed, 171 insertions, 69 deletions
diff --git a/plugins/check_cluster.c b/plugins/check_cluster.c
index c67573a3..32316410 100644
--- a/plugins/check_cluster.c
+++ b/plugins/check_cluster.c
@@ -80,7 +80,7 @@ int main(int argc, char **argv){
80 /* Initialize the thresholds */ 80 /* Initialize the thresholds */
81 set_thresholds(&thresholds, warn_threshold, crit_threshold); 81 set_thresholds(&thresholds, warn_threshold, crit_threshold);
82 if(verbose) 82 if(verbose)
83 print_thresholds("check_cluster", thresholds); 83 print_thresholds(thresholds);
84 84
85 /* check the data values */ 85 /* check the data values */
86 for(ptr=strtok(data_vals,",");ptr!=NULL;ptr=strtok(NULL,",")){ 86 for(ptr=strtok(data_vals,",");ptr!=NULL;ptr=strtok(NULL,",")){
diff --git a/plugins/check_procs.c b/plugins/check_procs.c
index d56d4571..7f3ca21d 100644
--- a/plugins/check_procs.c
+++ b/plugins/check_procs.c
@@ -44,6 +44,7 @@ const char *email = "nagiosplug-devel@lists.sourceforge.net";
44#include "popen.h" 44#include "popen.h"
45#include "utils.h" 45#include "utils.h"
46#include "regex.h" 46#include "regex.h"
47#include "utils_base.h"
47 48
48#include <pwd.h> 49#include <pwd.h>
49 50
@@ -53,6 +54,7 @@ int check_thresholds (int);
53int convert_to_seconds (char *); 54int convert_to_seconds (char *);
54void print_help (void); 55void print_help (void);
55void print_usage (void); 56void print_usage (void);
57void actions_on_failed_state (int, char*); /* Helper routine */
56 58
57int wmax = -1; 59int wmax = -1;
58int cmax = -1; 60int cmax = -1;
@@ -74,13 +76,16 @@ int options = 0; /* bitmask of filter criteria to test against */
74/* Different metrics */ 76/* Different metrics */
75char *metric_name; 77char *metric_name;
76enum metric { 78enum metric {
79 NONE,
80 DEFAULT,
77 METRIC_PROCS, 81 METRIC_PROCS,
78 METRIC_VSZ, 82 METRIC_VSZ,
79 METRIC_RSS, 83 METRIC_RSS,
80 METRIC_CPU, 84 METRIC_CPU,
81 METRIC_ELAPSED 85 METRIC_ELAPSED
82}; 86};
83enum metric metric = METRIC_PROCS; 87enum metric metric = DEFAULT;
88enum metric default_metric = METRIC_PROCS;
84 89
85int verbose = 0; 90int verbose = 0;
86int uid; 91int uid;
@@ -99,6 +104,14 @@ char tmp[MAX_INPUT_BUFFER];
99 104
100FILE *ps_input = NULL; 105FILE *ps_input = NULL;
101 106
107thresholds *number_threshold = NULL;
108thresholds *vsz_threshold = NULL;
109thresholds *rss_threshold = NULL;
110thresholds *cpu_threshold = NULL;
111
112int warn = 0; /* number of processes in warn state */
113int crit = 0; /* number of processes in crit state */
114int result = STATE_UNKNOWN;
102 115
103int 116int
104main (int argc, char **argv) 117main (int argc, char **argv)
@@ -127,10 +140,14 @@ main (int argc, char **argv)
127 int pos; /* number of spaces before 'args' in `ps` output */ 140 int pos; /* number of spaces before 'args' in `ps` output */
128 int cols; /* number of columns in ps output */ 141 int cols; /* number of columns in ps output */
129 int expected_cols = PS_COLS - 1; 142 int expected_cols = PS_COLS - 1;
130 int warn = 0; /* number of processes in warn state */ 143 int i = 0; /* Temporary values */
131 int crit = 0; /* number of processes in crit state */ 144 double rss_sum = 0;
132 int i = 0; 145 double vsz_sum = 0;
133 int result = STATE_UNKNOWN; 146 double cpu_sum = 0;
147 double vsz_max = 0;
148 double rss_max = 0;
149 double cpu_max = 0;
150
134 151
135 setlocale (LC_ALL, ""); 152 setlocale (LC_ALL, "");
136 bindtextdomain (PACKAGE, LOCALEDIR); 153 bindtextdomain (PACKAGE, LOCALEDIR);
@@ -141,7 +158,6 @@ main (int argc, char **argv)
141 procprog = malloc (MAX_INPUT_BUFFER); 158 procprog = malloc (MAX_INPUT_BUFFER);
142 159
143 asprintf (&metric_name, "PROCS"); 160 asprintf (&metric_name, "PROCS");
144 metric = METRIC_PROCS;
145 161
146 if (process_arguments (argc, argv) == ERROR) 162 if (process_arguments (argc, argv) == ERROR)
147 usage4 (_("Could not parse arguments")); 163 usage4 (_("Could not parse arguments"));
@@ -218,6 +234,7 @@ main (int argc, char **argv)
218 /* Ignore self */ 234 /* Ignore self */
219 if (mypid == procpid) continue; 235 if (mypid == procpid) continue;
220 236
237 /* Filter */
221 if ((options & STAT) && (strstr (statopts, procstat))) 238 if ((options & STAT) && (strstr (statopts, procstat)))
222 resultsum |= STAT; 239 resultsum |= STAT;
223 if ((options & ARGS) && procargs && (strstr (procargs, args) != NULL)) 240 if ((options & ARGS) && procargs && (strstr (procargs, args) != NULL))
@@ -244,35 +261,64 @@ main (int argc, char **argv)
244 continue; 261 continue;
245 262
246 procs++; 263 procs++;
247 if (verbose >= 2) { 264 if (verbose >= 3) {
248 printf ("Matched: uid=%d vsz=%d rss=%d pid=%d ppid=%d pcpu=%.2f stat=%s etime=%s prog=%s args=%s\n", 265 printf ("Matched: uid=%d vsz=%d rss=%d pid=%d ppid=%d pcpu=%.2f stat=%s etime=%s prog=%s args=%s\n",
249 procuid, procvsz, procrss, 266 procuid, procvsz, procrss,
250 procpid, procppid, procpcpu, procstat, 267 procpid, procppid, procpcpu, procstat,
251 procetime, procprog, procargs); 268 procetime, procprog, procargs);
252 } 269 }
253 270
254 if (metric == METRIC_VSZ) 271 /* Check against metric - old style single check */
255 i = check_thresholds (procvsz); 272 if (metric == METRIC_VSZ) {
256 else if (metric == METRIC_RSS) 273 actions_on_failed_state( check_thresholds (procvsz), procprog );
257 i = check_thresholds (procrss); 274 } else if (metric == METRIC_RSS) {
275 actions_on_failed_state( check_thresholds (procrss), procprog );
258 /* TODO? float thresholds for --metric=CPU */ 276 /* TODO? float thresholds for --metric=CPU */
259 else if (metric == METRIC_CPU) 277 } else if (metric == METRIC_CPU) {
260 i = check_thresholds ((int)procpcpu); 278 actions_on_failed_state( check_thresholds ((int)procpcpu), procprog );
261 else if (metric == METRIC_ELAPSED) 279 } else if (metric == METRIC_ELAPSED) {
262 i = check_thresholds (procseconds); 280 actions_on_failed_state( check_thresholds (procseconds), procprog );
263 281 }
264 if (metric != METRIC_PROCS) { 282
265 if (i == STATE_WARNING) { 283 /* Check against all new style thresholds */
266 warn++; 284 if (vsz_threshold != NULL) {
267 asprintf (&fails, "%s%s%s", fails, (strcmp(fails,"") ? ", " : ""), procprog); 285 if ((i = get_status( procvsz, vsz_threshold )) != STATE_OK ) {
268 result = max_state (result, i); 286 actions_on_failed_state(i, procprog);
287 if (verbose >= 2) {
288 printf("VSZ state %d: proc=%s vsz=%d ", i, procprog, procvsz);
289 print_thresholds( vsz_threshold );
290 }
291 }
292 }
293 if (rss_threshold != NULL) {
294 if ((i = get_status( procrss, rss_threshold )) != STATE_OK ) {
295 actions_on_failed_state(i, procprog);
296 if (verbose >= 2) {
297 printf("RSS: proc=%s rss=%d ", procprog, procrss);
298 print_thresholds( rss_threshold );
299 }
269 } 300 }
270 if (i == STATE_CRITICAL) { 301 }
271 crit++; 302 if (cpu_threshold != NULL) {
272 asprintf (&fails, "%s%s%s", fails, (strcmp(fails,"") ? ", " : ""), procprog); 303 if (( i = get_status( procpcpu, cpu_threshold )) != STATE_OK ) {
273 result = max_state (result, i); 304 actions_on_failed_state(i, procprog);
305 if (verbose >= 2) {
306 printf("CPU: proc=%s cpu=%f ", procprog, procpcpu);
307 print_thresholds( cpu_threshold );
308 }
274 } 309 }
275 } 310 }
311
312 /* Summary information */
313 rss_sum += procrss;
314 vsz_sum += procvsz;
315 cpu_sum += procpcpu;
316 if (procrss > rss_max)
317 rss_max = procrss;
318 if (procvsz > vsz_max)
319 vsz_max = procvsz;
320 if (procpcpu > cpu_max)
321 cpu_max = procpcpu;
276 } 322 }
277 /* This should not happen */ 323 /* This should not happen */
278 else if (verbose) { 324 else if (verbose) {
@@ -308,7 +354,12 @@ main (int argc, char **argv)
308 354
309 /* Needed if procs found, but none match filter */ 355 /* Needed if procs found, but none match filter */
310 if ( metric == METRIC_PROCS ) { 356 if ( metric == METRIC_PROCS ) {
311 result = max_state (result, check_thresholds (procs) ); 357 result = max_state (result, i = check_thresholds (procs) );
358 }
359
360 if (number_threshold != NULL) {
361 i = get_status( procs, number_threshold );
362 actions_on_failed_state(i, "NUMBER_OF_PROCESSES");
312 } 363 }
313 364
314 if ( result == STATE_OK ) { 365 if ( result == STATE_OK ) {
@@ -316,12 +367,12 @@ main (int argc, char **argv)
316 } else if (result == STATE_WARNING) { 367 } else if (result == STATE_WARNING) {
317 printf ("%s %s: ", metric_name, _("WARNING")); 368 printf ("%s %s: ", metric_name, _("WARNING"));
318 if ( metric != METRIC_PROCS ) { 369 if ( metric != METRIC_PROCS ) {
319 printf (_("%d warn out of "), warn); 370 printf (_("Alerts: %d warn from "), warn);
320 } 371 }
321 } else if (result == STATE_CRITICAL) { 372 } else if (result == STATE_CRITICAL) {
322 printf ("%s %s: ", metric_name, _("CRITICAL")); 373 printf ("%s %s: ", metric_name, _("CRITICAL"));
323 if (metric != METRIC_PROCS) { 374 if (metric != METRIC_PROCS) {
324 printf (_("%d crit, %d warn out of "), crit, warn); 375 printf (_("Alerts: %d crit, %d warn from "), crit, warn);
325 } 376 }
326 } 377 }
327 printf (ngettext ("%d process", "%d processes", (unsigned long) procs), procs); 378 printf (ngettext ("%d process", "%d processes", (unsigned long) procs), procs);
@@ -333,6 +384,17 @@ main (int argc, char **argv)
333 if ( verbose >= 1 && strcmp(fails,"") ) 384 if ( verbose >= 1 && strcmp(fails,"") )
334 printf (" [%s]", fails); 385 printf (" [%s]", fails);
335 386
387 printf(" | ");
388 if( number_threshold != NULL)
389 printf("number=%d ", procs);
390 if (procs > 0) {
391 if( vsz_threshold != NULL)
392 printf("vsz=%.0f ", vsz_sum/procs);
393 if( rss_threshold != NULL)
394 printf("rss=%.0f ", rss_sum/procs);
395 if( cpu_threshold != NULL)
396 printf("cpu=%.2f ", cpu_sum/procs);
397 }
336 printf ("\n"); 398 printf ("\n");
337 return result; 399 return result;
338} 400}
@@ -368,6 +430,22 @@ process_arguments (int argc, char **argv)
368 {"verbose", no_argument, 0, 'v'}, 430 {"verbose", no_argument, 0, 'v'},
369 {"ereg-argument-array", required_argument, 0, CHAR_MAX+1}, 431 {"ereg-argument-array", required_argument, 0, CHAR_MAX+1},
370 {"input-file", required_argument, 0, CHAR_MAX+2}, 432 {"input-file", required_argument, 0, CHAR_MAX+2},
433 {"number", optional_argument, 0, CHAR_MAX+3},
434 {"rss-threshold", optional_argument, 0, CHAR_MAX+4},
435 /*
436 {"rss-max", optional_argument, 0, CHAR_MAX+5},
437 {"rss-sum", optional_argument, 0, CHAR_MAX+6},
438 */
439 {"vsz-threshold", optional_argument, 0, CHAR_MAX+7},
440 /*
441 {"vsz-max", optional_argument, 0, CHAR_MAX+8},
442 {"vsz-sum", optional_argument, 0, CHAR_MAX+9},
443 */
444 {"cpu-threshold", optional_argument, 0, CHAR_MAX+10},
445 /*
446 {"cpu-max", optional_argument, 0, CHAR_MAX+11},
447 {"cpu-sum", optional_argument, 0, CHAR_MAX+12},
448 */
371 {0, 0, 0, 0} 449 {0, 0, 0, 0}
372 }; 450 };
373 451
@@ -537,6 +615,26 @@ process_arguments (int argc, char **argv)
537 case CHAR_MAX+2: 615 case CHAR_MAX+2:
538 input_filename = optarg; 616 input_filename = optarg;
539 break; 617 break;
618 case CHAR_MAX+3:
619 number_threshold = parse_thresholds_string(optarg);
620 if (metric == DEFAULT)
621 default_metric=NONE;
622 break;
623 case CHAR_MAX+4:
624 rss_threshold = parse_thresholds_string(optarg);
625 if (metric == DEFAULT)
626 default_metric=NONE;
627 break;
628 case CHAR_MAX+7:
629 vsz_threshold = parse_thresholds_string(optarg);
630 if (metric == DEFAULT)
631 default_metric=NONE;
632 break;
633 case CHAR_MAX+10:
634 cpu_threshold = parse_thresholds_string(optarg);
635 if (metric == DEFAULT)
636 default_metric=NONE;
637 break;
540 } 638 }
541 } 639 }
542 640
@@ -598,6 +696,9 @@ validate_arguments ()
598 if (fails==NULL) 696 if (fails==NULL)
599 fails = strdup(""); 697 fails = strdup("");
600 698
699 if (metric==DEFAULT)
700 metric = default_metric;
701
601 return options; 702 return options;
602} 703}
603 704
@@ -636,6 +737,21 @@ check_thresholds (int value)
636} 737}
637 738
638 739
740void
741actions_on_failed_state(int state, char *procprog) {
742 result = max_state (result, state);
743 if (state != STATE_WARNING && state != STATE_CRITICAL)
744 return;
745 if (state == STATE_WARNING) {
746 warn++;
747 }
748 if (state == STATE_CRITICAL) {
749 crit++;
750 }
751 /* TODO: This should be a hash, to remove duplicates */
752 asprintf (&fails, "%s%s%s", fails, (strcmp(fails,"") ? ", " : ""), procprog);
753}
754
639/* convert the elapsed time to seconds */ 755/* convert the elapsed time to seconds */
640int 756int
641convert_to_seconds(char *etime) { 757convert_to_seconds(char *etime) {
@@ -707,31 +823,21 @@ print_help (void)
707 printf ("Copyright (c) 1999 Ethan Galstad <nagios@nagios.org>\n"); 823 printf ("Copyright (c) 1999 Ethan Galstad <nagios@nagios.org>\n");
708 printf (COPYRIGHT, copyright, email); 824 printf (COPYRIGHT, copyright, email);
709 825
710 printf ("%s\n", _("Checks all processes and generates WARNING or CRITICAL states if the specified")); 826 print_usage ();
711 printf ("%s\n", _("metric is outside the required threshold ranges. The metric defaults to number"));
712 printf ("%s\n", _("of processes. Search filters can be applied to limit the processes to check."));
713 827
714 printf ("\n\n"); 828 printf ("\n\n");
715
716 print_usage ();
717 829
718 printf ("%s\n", _("Required Arguments:")); 830 printf("Checks all processes and, optionally, filters to a subset to check thresholds values against.\n");
719 printf (" %s\n", "-w, --warning=RANGE"); 831 printf("Can specify any of the following thresholds:\n");
720 printf (" %s\n", _("Generate warning state if metric is outside this range")); 832
721 printf (" %s\n", "-c, --critical=RANGE"); 833 printf(" --number=THRESHOLD - Compares the number of matching processes\n");
722 printf (" %s\n", _("Generate critical state if metric is outside this range")); 834 printf(" --vsz-threshold=THRESHOLD - Compares each process' vsz (in kilobytes)\n");
835 printf(" --rss-threshold=THRESHOLD - Compares each process' rss (in kilobytes)\n");
836 printf(" --cpu-threshold=THRESHOLD - Compares each process' cpu (in %%)\n");
837 /* TODO: Add support for etime */
838 printf("\n\n");
723 839
724 printf ("%s\n", _("Optional Arguments:")); 840 printf ("%s\n", _("Optional Arguments:"));
725 printf (" %s\n", "-m, --metric=TYPE");
726 printf (" %s\n", _("Check thresholds against metric. Valid types:"));
727 printf (" %s\n", _("PROCS - number of processes (default)"));
728 printf (" %s\n", _("VSZ - virtual memory size"));
729 printf (" %s\n", _("RSS - resident set memory size"));
730 printf (" %s\n", _("CPU - percentage cpu"));
731/* only linux etime is support currently */
732#if defined( __linux__ )
733 printf (" %s\n", _("ELAPSED - time elapsed in seconds"));
734#endif /* defined(__linux__) */
735 printf (_(UT_TIMEOUT), DEFAULT_SOCKET_TIMEOUT); 841 printf (_(UT_TIMEOUT), DEFAULT_SOCKET_TIMEOUT);
736 842
737 printf (" %s\n", "-v, --verbose"); 843 printf (" %s\n", "-v, --verbose");
@@ -759,29 +865,25 @@ print_help (void)
759 printf (" %s\n", "-C, --command=COMMAND"); 865 printf (" %s\n", "-C, --command=COMMAND");
760 printf (" %s\n", _("Only scan for exact matches of COMMAND (without path).")); 866 printf (" %s\n", _("Only scan for exact matches of COMMAND (without path)."));
761 867
762 printf(_("\n\ 868 printf("\n");
763RANGEs are specified 'min:max' or 'min:' or ':max' (or 'max'). If\n\
764specified 'max:min', a warning status will be generated if the\n\
765count is inside the specified range\n\n"));
766 869
767 printf(_("\ 870 printf("\
768This plugin checks the number of currently running processes and\n\ 871THRESHOLDS are specified as 'critical_range/warning_range' where\n\
769generates WARNING or CRITICAL states if the process count is outside\n\ 872RANGES are defined as 'min:max'. max can be removed if it is infinity.\n\
770the specified threshold ranges. The process count can be filtered by\n\ 873Alerts will occur inside this range, unless you specify '^' before\n\
771process owner, parent process PID, current state (e.g., 'Z'), or may\n\ 874the range, to mean alert outside this range\n\n");
772be the total number of running processes\n\n"));
773 875
774 printf ("%s\n", _("Examples:")); 876 printf ("%s\n", _("Examples:"));
775 printf (" %s\n", "check_procs -w 2:2 -c 2:1024 -C portsentry"); 877 printf (" %s\n", "check_procs --number=:2/5: -C portsentry");
776 printf (" %s\n", _("Warning if not two processes with command name portsentry.")); 878 printf (" %s\n", _("Warning if greater than five processes with command name portsentry."));
777 printf (" %s\n\n", _("Critical if < 2 or > 1024 processes")); 879 printf (" %s\n\n", _("Critical if <= 2 processes"));
778 printf (" %s\n", "check_procs -w 10 -a '/usr/local/bin/perl' -u root"); 880 printf (" %s\n", "check_procs --vsz-threshold=100:/50:");
779 printf (" %s\n", _("Warning alert if > 10 processes with command arguments containing")); 881 printf (" %s\n\n", _("Warning if vsz of any processes is over 50K or critical if vsz is over 100K"));
780 printf (" %s\n\n", _("'/usr/local/bin/perl' and owned by root")); 882 printf (" %s\n", "check_procs --cpu-threshold=20:/10: --ereg-argument-array='java.*server'");
781 printf (" %s\n", "check_procs -w 50000 -c 100000 --metric=VSZ"); 883 printf (" %s\n\n", _("For all processes with arguments matching the regular expression, warning if cpu is over 10% or critical if over 20%"));
782 printf (" %s\n\n", _("Alert if vsz of any processes over 50K or 100K")); 884 printf (" %s\n", "check_procs --rss-threshold=100: --number=/:10 --cpu-threshold=30:/10: -a '/usr/local/bin/perl' -u root");
783 printf (" %s\n", "check_procs -w 10 -c 20 --metric=CPU"); 885 printf (" %s\n", _("Critical if rss >= 100K, or warning if total number of process <= 10, or critical if cpu >= 30% or warning if cpu >= 10%."));
784 printf (" %s\n\n", _("Alert if cpu of any processes over 10%% or 20%%")); 886 printf (" %s\n", _("Filter by arguments containing '/usr/local/bin/perl' and owned by root"));
785 887
786 printf (_(UT_SUPPORT)); 888 printf (_(UT_SUPPORT));
787} 889}