diff options
author | Subhendu Ghosh <sghosh@users.sourceforge.net> | 2002-07-04 22:02:18 +0000 |
---|---|---|
committer | Subhendu Ghosh <sghosh@users.sourceforge.net> | 2002-07-04 22:02:18 +0000 |
commit | 78cc9b2fc14727d4f169874ca66e361ba7c102ee (patch) | |
tree | 28cbaefb65490f5a6a125709959b14cff2245893 /contrib | |
parent | 764e880f22d65ccca612fee8da1505cb9351eb16 (diff) | |
download | monitoring-plugins-78cc9b2fc14727d4f169874ca66e361ba7c102ee.tar.gz |
Jerome Tytgat - accumulated/percentage CPU/MEM per process - bash plugin
git-svn-id: https://nagiosplug.svn.sourceforge.net/svnroot/nagiosplug/nagiosplug/trunk@63 f882894a-f735-0410-b71e-b25c423dba1c
Diffstat (limited to 'contrib')
-rw-r--r-- | contrib/check_procl.sh | 400 |
1 files changed, 400 insertions, 0 deletions
diff --git a/contrib/check_procl.sh b/contrib/check_procl.sh new file mode 100644 index 00000000..b1793ad5 --- /dev/null +++ b/contrib/check_procl.sh | |||
@@ -0,0 +1,400 @@ | |||
1 | #!/bin/bash | ||
2 | |||
3 | # | ||
4 | # Check_procl.sh | ||
5 | # | ||
6 | # Program: Process load check plugin for Nagios | ||
7 | # License : GPL | ||
8 | # Copyright (c) 2002 Jerome Tytgat (j.tytgat@sioban.net) | ||
9 | # | ||
10 | # check_procl.sh,v 1.1 2002/07/04 09:35 | ||
11 | # | ||
12 | # Description : | ||
13 | # | ||
14 | # This plugin is for check the %cpu, %mem or cputime of one or more process | ||
15 | # | ||
16 | # Usage : | ||
17 | # | ||
18 | # check_procl.sh -p process1,process2,... -w a.b -c c.d --cpu | ||
19 | # check_procl.sh -p process1,process2,... -w a.b -c c.d --mem | ||
20 | # check_procl.sh -p process1,process2,... -w a:b:c -c d:e:f --cputime | ||
21 | # | ||
22 | # check_procl.sh -p %all% -e process1,process2,... -w <a.b | a:b:c> -c <c.d | d:e:f> <--cpu | --mem | --cputime> | ||
23 | # check_procl.sh -p %max% -e process1,process2,... -w <a.b | a:b:c> -c <c.d | d:e:f> <--cpu | --mem | --cputime> | ||
24 | # | ||
25 | # Example : | ||
26 | # | ||
27 | # To know the memory eaten by HTTPD processes, be warned when it reach 50% and be critical when it reach 75% | ||
28 | # check_procl.sh -p httpd -w 50.0 -c 75.0 --mem | ||
29 | # > OK - total %MEM for process httpd : 46.1 | ||
30 | # | ||
31 | # To know the process which eat the more cpu time, but as we are under linux and are using kapm we do : | ||
32 | # check_procl.sh -p %max% -e kapmd-idle,kapmd -w 0:1:0 -c 0:2:0 --cputime | ||
33 | # > CRITICAL - total CPUTIME for process named : 02:32:10 | ||
34 | # | ||
35 | # Tested on solaris 7/8, Linux Redhat 7.3 and Linux Suse 7.1 | ||
36 | # | ||
37 | # BUGS : problems with handling time on solaris... | ||
38 | |||
39 | |||
40 | help_usage() { | ||
41 | echo "Usage:" | ||
42 | echo " $0 -p <process_name1,process_name2,... | %all% | %max%>" | ||
43 | echo " [-e <process_name1,process_name2,...>] -w warning -c critical < --cpu | --mem | --cputime>" | ||
44 | echo " $0 (-v | --version)" | ||
45 | echo " $0 (-h | --help)" | ||
46 | } | ||
47 | |||
48 | help_version() { | ||
49 | echo "check_procl.sh (nagios-plugins) 1.1" | ||
50 | echo "The nagios plugins come with ABSOLUTELY NO WARRANTY. You may redistribute" | ||
51 | echo "copies of the plugins under the terms of the GNU General Public License." | ||
52 | echo "For more information about these matters, see the file named COPYING." | ||
53 | echo "Copyright (c) 2002 Jerome Tytgat - j.tytgat@sioban.net" | ||
54 | echo "Greetings goes to Websurg which kindly let me took time to develop this" | ||
55 | echo " Manu Feig and Jacques Kern who were my beta testers, thanks to them !" | ||
56 | } | ||
57 | |||
58 | verify_dep() { | ||
59 | needed="bash cut egrep expr grep let ps sed sort tail test tr wc" | ||
60 | for i in `echo $needed` | ||
61 | do | ||
62 | type $i > /dev/null 2>&1 /dev/null | ||
63 | if [ $? -eq 1 ] | ||
64 | then | ||
65 | echo "I am missing an important component : $i" | ||
66 | echo "Cannot continue, sorry, try to find the missing one..." | ||
67 | exit 3 | ||
68 | fi | ||
69 | done | ||
70 | } | ||
71 | |||
72 | myself=$0 | ||
73 | |||
74 | verify_dep | ||
75 | |||
76 | if [ "$1" = "-h" -o "$1" = "--help" ] | ||
77 | then | ||
78 | help_version | ||
79 | echo "" | ||
80 | echo "This plugin will check either the cumulutative %cpu, %mem or cputime" | ||
81 | echo "of a process." | ||
82 | echo "" | ||
83 | help_usage | ||
84 | echo "" | ||
85 | echo "Required Arguments:" | ||
86 | echo " -p, --process STRING1,STRING2,..." | ||
87 | echo " names of the processes we want to monitor," | ||
88 | echo " you can add as much as process as you want, separated by comma," | ||
89 | echo " hey will be cumulated" | ||
90 | echo " -p, --process %all%" | ||
91 | echo " The special keyword %all% will check the cumulative cpu/mem/time of all process" | ||
92 | echo " WARNING : Can be very slow on heavy loaded servers, watch your timeout !" | ||
93 | echo " -p, --process %max%" | ||
94 | echo " The special keyword %max% will check the process which eat the most" | ||
95 | echo " WARNING : only select the process which eat the more, not the cumulative," | ||
96 | echo " but return the cumulative" | ||
97 | echo " -w, --warning INTEGER.INTEGER or INTERGER:INTEGER:INTEGER" | ||
98 | echo " generate warning state if process count is outside this range" | ||
99 | echo " -c, --critical INTEGER.INTEGER or INTERGER:INTEGER:INTEGER" | ||
100 | echo " generate critical state if process count is outside this range" | ||
101 | echo " --cpu" | ||
102 | echo " return the current cpu usage for the given process" | ||
103 | echo " --mem" | ||
104 | echo " return the current memory usage for the given process" | ||
105 | echo " --cputime" | ||
106 | echo " return the total cputime usage for the given process" | ||
107 | echo "" | ||
108 | echo "Optional Argument:" | ||
109 | echo " -e, --exclude-process STRING1,STRING2,..." | ||
110 | echo " names of the processes we want don't want to monitor" | ||
111 | echo " only useful when associated with %all% or %max% keywords, else ignored" | ||
112 | echo " ex : kapm-idled on linux is a process which eat memory / cputime but not really... ;-)" | ||
113 | echo "" | ||
114 | exit 3 | ||
115 | fi | ||
116 | |||
117 | if [ "$1" = "-v" -o "$1" = "--version" ] | ||
118 | then | ||
119 | help_version | ||
120 | exit 3 | ||
121 | fi | ||
122 | |||
123 | if [ `echo $@|tr "=" " "|wc -w` -lt 7 ] | ||
124 | then | ||
125 | echo "Bad arguments number (need at least 7)!" | ||
126 | help_usage | ||
127 | exit 3 | ||
128 | fi | ||
129 | |||
130 | tt=0 | ||
131 | process_name="" | ||
132 | exclude_process_name="" | ||
133 | wt="" | ||
134 | ct="" | ||
135 | |||
136 | # Test of the command lines arguments | ||
137 | while test $# -gt 0 | ||
138 | do | ||
139 | |||
140 | case "$1" in | ||
141 | -p|--process) | ||
142 | if [ -n "$process_name" ] | ||
143 | then | ||
144 | echo "Only one --process argument is useful..." | ||
145 | help_usage | ||
146 | exit 3 | ||
147 | fi | ||
148 | shift | ||
149 | process_name="`echo $1|tr \",\" \"|\"`" | ||
150 | ;; | ||
151 | -e|--exclude-process) | ||
152 | if [ -n "$exclude_process_name" ] | ||
153 | then | ||
154 | echo "Only one --exclude-process argument is useful..." | ||
155 | help_usage | ||
156 | exit 3 | ||
157 | fi | ||
158 | shift | ||
159 | exclude_process_name="`echo $1|tr \",\" \"|\"`" | ||
160 | ;; | ||
161 | -w|--warning) | ||
162 | if [ -n "$wt" ] | ||
163 | then | ||
164 | echo "Only one --warning argument needed... Trying to test bad things ? :-)" | ||
165 | help_usage | ||
166 | exit 3 | ||
167 | fi | ||
168 | shift | ||
169 | wt=$1 | ||
170 | ;; | ||
171 | -c|--critical) | ||
172 | if [ -n "$ct" ] | ||
173 | then | ||
174 | echo "Only one --critical argument needed... Trying to test bad things ? :-)" | ||
175 | help_usage | ||
176 | exit 3 | ||
177 | fi | ||
178 | shift | ||
179 | ct=$1 | ||
180 | ;; | ||
181 | --cpu) | ||
182 | if [ $tt -eq 0 ] | ||
183 | then | ||
184 | tt=1 | ||
185 | else | ||
186 | echo "Only one of the arguments --cpu/--mem/--cputime can be used at a time !" | ||
187 | help_usage | ||
188 | exit 3 | ||
189 | fi | ||
190 | type_arg_aff="%CPU" | ||
191 | type_arg="pcpu" | ||
192 | delim="." | ||
193 | ;; | ||
194 | --mem) | ||
195 | if [ $tt -eq 0 ] | ||
196 | then | ||
197 | tt=2 | ||
198 | else | ||
199 | echo "Only one of the arguments --cpu/--mem/--cputime can be used at a time !" | ||
200 | help_usage | ||
201 | exit 3 | ||
202 | fi | ||
203 | type_arg_aff="%MEM" | ||
204 | type_arg="pmem" | ||
205 | delim="." | ||
206 | ;; | ||
207 | --cputime) | ||
208 | if [ $tt -eq 0 ] | ||
209 | then | ||
210 | tt=3 | ||
211 | else | ||
212 | echo "Only one of the arguments --cpu/--mem/--cputime can be used at a time !" | ||
213 | help_usage | ||
214 | exit 3 | ||
215 | fi | ||
216 | type_arg_aff="TIME" | ||
217 | type_arg="time" | ||
218 | delim=":" | ||
219 | ;; | ||
220 | *) | ||
221 | echo "Unknown argument $1" | ||
222 | help_usage | ||
223 | exit 3 | ||
224 | ;; | ||
225 | esac | ||
226 | shift | ||
227 | done | ||
228 | |||
229 | # Is the process running ? | ||
230 | if [ -z "`ps -e | egrep \"$process_name?\"`" -a "$process_name" != "%all%" -a "$process_name" != "%max%" ] | ||
231 | then | ||
232 | echo "WARNING: process $process_name not running !" | ||
233 | exit 3 | ||
234 | fi | ||
235 | |||
236 | # Cut of warning and critical values | ||
237 | wt_value1=`echo $wt|cut -d"$delim" -f1` | ||
238 | wt_value2=`echo $wt|cut -d"$delim" -f2` | ||
239 | ct_value1=`echo $ct|cut -d"$delim" -f1` | ||
240 | ct_value2=`echo $ct|cut -d"$delim" -f2` | ||
241 | |||
242 | if [ $tt -eq 3 ] | ||
243 | then | ||
244 | wt_value3=`echo $wt|cut -d"$delim" -f3` | ||
245 | ct_value3=`echo $ct|cut -d"$delim" -f3` | ||
246 | else | ||
247 | wt_value3=0 | ||
248 | ct_value3=0 | ||
249 | fi | ||
250 | |||
251 | # Integrity check of warning and critical values | ||
252 | if [ -z "$wt_value1" -o -z "$wt_value2" -o -z "$wt_value3" ] | ||
253 | then | ||
254 | echo "Bad expression in the WARNING field : $wt" | ||
255 | help_usage | ||
256 | exit 3 | ||
257 | fi | ||
258 | |||
259 | if [ "`echo $wt_value1|tr -d \"[:digit:]\"`" != "" -o "`echo $wt_value2|tr -d \"[:digit:]\"`" != "" -o "`echo $wt_value3|tr -d \"[:digit:]\"`" != "" ] | ||
260 | then | ||
261 | echo "Bad expression in the WARNING field : $wt" | ||
262 | help_usage | ||
263 | exit 3 | ||
264 | fi | ||
265 | |||
266 | if [ -z "$ct_value1" -o -z "$ct_value2" -o -z "$ct_value3" ] | ||
267 | then | ||
268 | echo "Bad expression in the CRITICAL field : $ct" | ||
269 | help_usage | ||
270 | exit 3 | ||
271 | fi | ||
272 | |||
273 | |||
274 | if [ "`echo $ct_value1|tr -d \"[:digit:]\"`" != "" -o "`echo $ct_value2|tr -d \"[:digit:]\"`" != "" -o "`echo $ct_value3|tr -d \"[:digit:]\"`" != "" ] | ||
275 | then | ||
276 | echo "Bad expression in the CRITICAL field : $ct" | ||
277 | help_usage | ||
278 | exit 3 | ||
279 | fi | ||
280 | |||
281 | # ps line construction set... | ||
282 | case "$process_name" in | ||
283 | %all%) | ||
284 | if [ -z "$exclude_process_name" ] | ||
285 | then | ||
286 | psline=`ps -eo $type_arg,comm|egrep -v "$myself|$type_arg_aff?"|sed "s/^ *\([0-9]\)/\1/"|cut -d" " -f1` | ||
287 | else | ||
288 | psline=`ps -eo $type_arg,comm|egrep -v "$myself|$type_arg_aff|$exclude_process_name?"|sed "s/^ *\([0-9]\)/\1/"|cut -d" " -f1` | ||
289 | fi | ||
290 | ;; | ||
291 | %max%) | ||
292 | if [ -z "$exclude_process_name" ] | ||
293 | then | ||
294 | pstmp=`ps -eo $type_arg,comm|egrep -v "$myself|$type_arg_aff?"|sort|tail -1|sed "s/^ *\([0-9]\)/\1/"|cut -d" " -f2` | ||
295 | else | ||
296 | pstmp=`ps -eo $type_arg,comm|egrep -v "$myself|$type_arg_aff|$exclude_process_name?"|sort|tail -1|sed "s/^ *\([0-9]\)/\1/"|cut -d" " -f2` | ||
297 | fi | ||
298 | psline=`ps -eo $type_arg,comm|grep $pstmp|sed "s/^ *\([0-9]\)/\1/"|cut -d" " -f1` | ||
299 | process_name=$pstmp | ||
300 | ;; | ||
301 | *) | ||
302 | psline=`ps -eo $type_arg,comm|egrep "$process_name?"|sed "s/^ *\([0-9]\)/\1/"|cut -d" " -f1` | ||
303 | ;; | ||
304 | esac | ||
305 | |||
306 | total1=0 | ||
307 | total2=0 | ||
308 | total3=0 | ||
309 | |||
310 | |||
311 | # fetching the values | ||
312 | for i in $psline | ||
313 | do | ||
314 | # Special case for solaris - several format exist for the time function... | ||
315 | if [ ${#i} -le 6 -a "$tt" -eq 3 ] | ||
316 | then | ||
317 | i="00:$i" | ||
318 | fi | ||
319 | value1=`echo $i|cut -d$delim -f1` | ||
320 | value2=`echo $i|cut -d$delim -f2` | ||
321 | value3=`echo $i|cut -d$delim -f3` | ||
322 | value3=`test -z "$value3" && echo 0 || echo $value3` | ||
323 | total1=`expr $total1 + $value1` | ||
324 | total2=`expr $total2 + $value2` | ||
325 | total3=`expr $total3 + $value3` | ||
326 | if [ $tt -eq 3 ] | ||
327 | then | ||
328 | if [ $total3 -ge 60 ] | ||
329 | then | ||
330 | let total2+=1 | ||
331 | let total3-=60 | ||
332 | fi | ||
333 | if [ $total2 -ge 60 ] | ||
334 | then | ||
335 | let total1+=1 | ||
336 | let total2-=60 | ||
337 | fi | ||
338 | else | ||
339 | if [ $total2 -ge 10 ] | ||
340 | then | ||
341 | let total1+=1 | ||
342 | let total2=total2-10 | ||
343 | fi | ||
344 | fi | ||
345 | done | ||
346 | |||
347 | warn=0 | ||
348 | crit=0 | ||
349 | |||
350 | # evaluation of the cumulative values vs warning and critical values | ||
351 | case "$tt" in | ||
352 | 1) | ||
353 | return_total="$total1.$total2" | ||
354 | test $total1 -gt $ct_value1 && crit=1 | ||
355 | test $total1 -eq $ct_value1 -a $total2 -ge $ct_value2 && crit=1 | ||
356 | test $total1 -gt $wt_value1 && warn=1 | ||
357 | test $total1 -eq $wt_value1 -a $total2 -ge $wt_value2 && warn=1 | ||
358 | ;; | ||
359 | 2) | ||
360 | return_total="$total1.$total2" | ||
361 | test $total1 -gt $ct_value1 && crit=1 | ||
362 | test $total1 -eq $ct_value1 -a $total2 -ge $ct_value2 && crit=1 | ||
363 | test $total1 -gt $wt_value1 && warn=1 | ||
364 | test $total1 -eq $wt_value1 -a $total2 -ge $wt_value2 && warn=1 | ||
365 | ;; | ||
366 | 3) | ||
367 | return_total="`test ${#total1} -eq 1 && echo 0`$total1:`test ${#total2} -eq 1 && echo 0`$total2:`test ${#total3} -eq 1 && echo 0`$total3" | ||
368 | test $total1 -gt $ct_value1 && crit=1 | ||
369 | test $total1 -eq $ct_value1 -a $total2 -gt $ct_value2 && crit=1 | ||
370 | test $total1 -eq $ct_value1 -a $total2 -eq $ct_value2 -a $total3 -ge $ct_value3 && crit=1 | ||
371 | test $total1 -gt $wt_value1 && warn=1 | ||
372 | test $total1 -eq $wt_value1 -a $total2 -gt $wt_value2 && warn=1 | ||
373 | test $total1 -eq $wt_value1 -a $total2 -eq $wt_value2 -a $total3 -ge $wt_value3 && warn=1 | ||
374 | ;; | ||
375 | esac | ||
376 | |||
377 | # last check ... | ||
378 | if [ $crit -eq 1 -a $warn -eq 0 ] | ||
379 | then | ||
380 | echo "Critical value must be greater than warning value !" | ||
381 | help_usage | ||
382 | exit 3 | ||
383 | fi | ||
384 | |||
385 | # Finally Inform Nagios of what we found... | ||
386 | if [ $crit -eq 1 ] | ||
387 | then | ||
388 | echo "CRITICAL - total $type_arg_aff for process `echo $process_name|tr \"|\" \",\"` : $return_total" | ||
389 | exit 2 | ||
390 | elif [ $warn -eq 1 ] | ||
391 | then | ||
392 | echo "WARNING - total $type_arg_aff for process `echo $process_name|tr \"|\" \",\"` : $return_total" | ||
393 | exit 1 | ||
394 | else | ||
395 | echo "OK - total $type_arg_aff for process `echo $process_name|tr \"|\" \",\"` : $return_total" | ||
396 | exit 0 | ||
397 | fi | ||
398 | |||
399 | # Hey what are we doing here ??? | ||
400 | exit 3 \ No newline at end of file | ||