summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorSubhendu Ghosh <sghosh@users.sourceforge.net>2002-07-04 22:02:18 (GMT)
committerSubhendu Ghosh <sghosh@users.sourceforge.net>2002-07-04 22:02:18 (GMT)
commit78cc9b2fc14727d4f169874ca66e361ba7c102ee (patch)
tree28cbaefb65490f5a6a125709959b14cff2245893
parent764e880f22d65ccca612fee8da1505cb9351eb16 (diff)
downloadmonitoring-plugins-78cc9b2fc14727d4f169874ca66e361ba7c102ee.tar.gz
Jerome Tytgat - accumulated/percentage CPU/MEM per process - bash plugin
git-svn-id: https://nagiosplug.svn.sourceforge.net/svnroot/nagiosplug/nagiosplug/trunk@63 f882894a-f735-0410-b71e-b25c423dba1c
-rw-r--r--contrib/check_procl.sh400
1 files changed, 400 insertions, 0 deletions
diff --git a/contrib/check_procl.sh b/contrib/check_procl.sh
new file mode 100644
index 0000000..b1793ad
--- /dev/null
+++ b/contrib/check_procl.sh
@@ -0,0 +1,400 @@
1#!/bin/bash
2
3#
4# Check_procl.sh
5#
6# Program: Process load check plugin for Nagios
7# License : GPL
8# Copyright (c) 2002 Jerome Tytgat (j.tytgat@sioban.net)
9#
10# check_procl.sh,v 1.1 2002/07/04 09:35
11#
12# Description :
13#
14# This plugin is for check the %cpu, %mem or cputime of one or more process
15#
16# Usage :
17#
18# check_procl.sh -p process1,process2,... -w a.b -c c.d --cpu
19# check_procl.sh -p process1,process2,... -w a.b -c c.d --mem
20# check_procl.sh -p process1,process2,... -w a:b:c -c d:e:f --cputime
21#
22# check_procl.sh -p %all% -e process1,process2,... -w <a.b | a:b:c> -c <c.d | d:e:f> <--cpu | --mem | --cputime>
23# check_procl.sh -p %max% -e process1,process2,... -w <a.b | a:b:c> -c <c.d | d:e:f> <--cpu | --mem | --cputime>
24#
25# Example :
26#
27# To know the memory eaten by HTTPD processes, be warned when it reach 50% and be critical when it reach 75%
28# check_procl.sh -p httpd -w 50.0 -c 75.0 --mem
29# > OK - total %MEM for process httpd : 46.1
30#
31# To know the process which eat the more cpu time, but as we are under linux and are using kapm we do :
32# check_procl.sh -p %max% -e kapmd-idle,kapmd -w 0:1:0 -c 0:2:0 --cputime
33# > CRITICAL - total CPUTIME for process named : 02:32:10
34#
35# Tested on solaris 7/8, Linux Redhat 7.3 and Linux Suse 7.1
36#
37# BUGS : problems with handling time on solaris...
38
39
40help_usage() {
41 echo "Usage:"
42 echo " $0 -p <process_name1,process_name2,... | %all% | %max%>"
43 echo " [-e <process_name1,process_name2,...>] -w warning -c critical < --cpu | --mem | --cputime>"
44 echo " $0 (-v | --version)"
45 echo " $0 (-h | --help)"
46}
47
48help_version() {
49 echo "check_procl.sh (nagios-plugins) 1.1"
50 echo "The nagios plugins come with ABSOLUTELY NO WARRANTY. You may redistribute"
51 echo "copies of the plugins under the terms of the GNU General Public License."
52 echo "For more information about these matters, see the file named COPYING."
53 echo "Copyright (c) 2002 Jerome Tytgat - j.tytgat@sioban.net"
54 echo "Greetings goes to Websurg which kindly let me took time to develop this"
55 echo " Manu Feig and Jacques Kern who were my beta testers, thanks to them !"
56}
57
58verify_dep() {
59 needed="bash cut egrep expr grep let ps sed sort tail test tr wc"
60 for i in `echo $needed`
61 do
62 type $i > /dev/null 2>&1 /dev/null
63 if [ $? -eq 1 ]
64 then
65 echo "I am missing an important component : $i"
66 echo "Cannot continue, sorry, try to find the missing one..."
67 exit 3
68 fi
69 done
70}
71
72myself=$0
73
74verify_dep
75
76if [ "$1" = "-h" -o "$1" = "--help" ]
77then
78 help_version
79 echo ""
80 echo "This plugin will check either the cumulutative %cpu, %mem or cputime"
81 echo "of a process."
82 echo ""
83 help_usage
84 echo ""
85 echo "Required Arguments:"
86 echo " -p, --process STRING1,STRING2,..."
87 echo " names of the processes we want to monitor,"
88 echo " you can add as much as process as you want, separated by comma,"
89 echo " hey will be cumulated"
90 echo " -p, --process %all%"
91 echo " The special keyword %all% will check the cumulative cpu/mem/time of all process"
92 echo " WARNING : Can be very slow on heavy loaded servers, watch your timeout !"
93 echo " -p, --process %max%"
94 echo " The special keyword %max% will check the process which eat the most"
95 echo " WARNING : only select the process which eat the more, not the cumulative,"
96 echo " but return the cumulative"
97 echo " -w, --warning INTEGER.INTEGER or INTERGER:INTEGER:INTEGER"
98 echo " generate warning state if process count is outside this range"
99 echo " -c, --critical INTEGER.INTEGER or INTERGER:INTEGER:INTEGER"
100 echo " generate critical state if process count is outside this range"
101 echo " --cpu"
102 echo " return the current cpu usage for the given process"
103 echo " --mem"
104 echo " return the current memory usage for the given process"
105 echo " --cputime"
106 echo " return the total cputime usage for the given process"
107 echo ""
108 echo "Optional Argument:"
109 echo " -e, --exclude-process STRING1,STRING2,..."
110 echo " names of the processes we want don't want to monitor"
111 echo " only useful when associated with %all% or %max% keywords, else ignored"
112 echo " ex : kapm-idled on linux is a process which eat memory / cputime but not really... ;-)"
113 echo ""
114 exit 3
115fi
116
117if [ "$1" = "-v" -o "$1" = "--version" ]
118then
119 help_version
120 exit 3
121fi
122
123if [ `echo $@|tr "=" " "|wc -w` -lt 7 ]
124then
125 echo "Bad arguments number (need at least 7)!"
126 help_usage
127 exit 3
128fi
129
130tt=0
131process_name=""
132exclude_process_name=""
133wt=""
134ct=""
135
136# Test of the command lines arguments
137while test $# -gt 0
138do
139
140 case "$1" in
141 -p|--process)
142 if [ -n "$process_name" ]
143 then
144 echo "Only one --process argument is useful..."
145 help_usage
146 exit 3
147 fi
148 shift
149 process_name="`echo $1|tr \",\" \"|\"`"
150 ;;
151 -e|--exclude-process)
152 if [ -n "$exclude_process_name" ]
153 then
154 echo "Only one --exclude-process argument is useful..."
155 help_usage
156 exit 3
157 fi
158 shift
159 exclude_process_name="`echo $1|tr \",\" \"|\"`"
160 ;;
161 -w|--warning)
162 if [ -n "$wt" ]
163 then
164 echo "Only one --warning argument needed... Trying to test bad things ? :-)"
165 help_usage
166 exit 3
167 fi
168 shift
169 wt=$1
170 ;;
171 -c|--critical)
172 if [ -n "$ct" ]
173 then
174 echo "Only one --critical argument needed... Trying to test bad things ? :-)"
175 help_usage
176 exit 3
177 fi
178 shift
179 ct=$1
180 ;;
181 --cpu)
182 if [ $tt -eq 0 ]
183 then
184 tt=1
185 else
186 echo "Only one of the arguments --cpu/--mem/--cputime can be used at a time !"
187 help_usage
188 exit 3
189 fi
190 type_arg_aff="%CPU"
191 type_arg="pcpu"
192 delim="."
193 ;;
194 --mem)
195 if [ $tt -eq 0 ]
196 then
197 tt=2
198 else
199 echo "Only one of the arguments --cpu/--mem/--cputime can be used at a time !"
200 help_usage
201 exit 3
202 fi
203 type_arg_aff="%MEM"
204 type_arg="pmem"
205 delim="."
206 ;;
207 --cputime)
208 if [ $tt -eq 0 ]
209 then
210 tt=3
211 else
212 echo "Only one of the arguments --cpu/--mem/--cputime can be used at a time !"
213 help_usage
214 exit 3
215 fi
216 type_arg_aff="TIME"
217 type_arg="time"
218 delim=":"
219 ;;
220 *)
221 echo "Unknown argument $1"
222 help_usage
223 exit 3
224 ;;
225 esac
226 shift
227done
228
229# Is the process running ?
230if [ -z "`ps -e | egrep \"$process_name?\"`" -a "$process_name" != "%all%" -a "$process_name" != "%max%" ]
231then
232 echo "WARNING: process $process_name not running !"
233 exit 3
234fi
235
236# Cut of warning and critical values
237wt_value1=`echo $wt|cut -d"$delim" -f1`
238wt_value2=`echo $wt|cut -d"$delim" -f2`
239ct_value1=`echo $ct|cut -d"$delim" -f1`
240ct_value2=`echo $ct|cut -d"$delim" -f2`
241
242if [ $tt -eq 3 ]
243then
244 wt_value3=`echo $wt|cut -d"$delim" -f3`
245 ct_value3=`echo $ct|cut -d"$delim" -f3`
246else
247 wt_value3=0
248 ct_value3=0
249fi
250
251# Integrity check of warning and critical values
252if [ -z "$wt_value1" -o -z "$wt_value2" -o -z "$wt_value3" ]
253then
254 echo "Bad expression in the WARNING field : $wt"
255 help_usage
256 exit 3
257fi
258
259if [ "`echo $wt_value1|tr -d \"[:digit:]\"`" != "" -o "`echo $wt_value2|tr -d \"[:digit:]\"`" != "" -o "`echo $wt_value3|tr -d \"[:digit:]\"`" != "" ]
260then
261 echo "Bad expression in the WARNING field : $wt"
262 help_usage
263 exit 3
264fi
265
266if [ -z "$ct_value1" -o -z "$ct_value2" -o -z "$ct_value3" ]
267then
268 echo "Bad expression in the CRITICAL field : $ct"
269 help_usage
270 exit 3
271fi
272
273
274if [ "`echo $ct_value1|tr -d \"[:digit:]\"`" != "" -o "`echo $ct_value2|tr -d \"[:digit:]\"`" != "" -o "`echo $ct_value3|tr -d \"[:digit:]\"`" != "" ]
275then
276 echo "Bad expression in the CRITICAL field : $ct"
277 help_usage
278 exit 3
279fi
280
281# ps line construction set...
282case "$process_name" in
283 %all%)
284 if [ -z "$exclude_process_name" ]
285 then
286 psline=`ps -eo $type_arg,comm|egrep -v "$myself|$type_arg_aff?"|sed "s/^ *\([0-9]\)/\1/"|cut -d" " -f1`
287 else
288 psline=`ps -eo $type_arg,comm|egrep -v "$myself|$type_arg_aff|$exclude_process_name?"|sed "s/^ *\([0-9]\)/\1/"|cut -d" " -f1`
289 fi
290 ;;
291 %max%)
292 if [ -z "$exclude_process_name" ]
293 then
294 pstmp=`ps -eo $type_arg,comm|egrep -v "$myself|$type_arg_aff?"|sort|tail -1|sed "s/^ *\([0-9]\)/\1/"|cut -d" " -f2`
295 else
296 pstmp=`ps -eo $type_arg,comm|egrep -v "$myself|$type_arg_aff|$exclude_process_name?"|sort|tail -1|sed "s/^ *\([0-9]\)/\1/"|cut -d" " -f2`
297 fi
298 psline=`ps -eo $type_arg,comm|grep $pstmp|sed "s/^ *\([0-9]\)/\1/"|cut -d" " -f1`
299 process_name=$pstmp
300 ;;
301 *)
302 psline=`ps -eo $type_arg,comm|egrep "$process_name?"|sed "s/^ *\([0-9]\)/\1/"|cut -d" " -f1`
303 ;;
304esac
305
306total1=0
307total2=0
308total3=0
309
310
311# fetching the values
312for i in $psline
313do
314 # Special case for solaris - several format exist for the time function...
315 if [ ${#i} -le 6 -a "$tt" -eq 3 ]
316 then
317 i="00:$i"
318 fi
319 value1=`echo $i|cut -d$delim -f1`
320 value2=`echo $i|cut -d$delim -f2`
321 value3=`echo $i|cut -d$delim -f3`
322 value3=`test -z "$value3" && echo 0 || echo $value3`
323 total1=`expr $total1 + $value1`
324 total2=`expr $total2 + $value2`
325 total3=`expr $total3 + $value3`
326 if [ $tt -eq 3 ]
327 then
328 if [ $total3 -ge 60 ]
329 then
330 let total2+=1
331 let total3-=60
332 fi
333 if [ $total2 -ge 60 ]
334 then
335 let total1+=1
336 let total2-=60
337 fi
338 else
339 if [ $total2 -ge 10 ]
340 then
341 let total1+=1
342 let total2=total2-10
343 fi
344 fi
345done
346
347warn=0
348crit=0
349
350# evaluation of the cumulative values vs warning and critical values
351case "$tt" in
352 1)
353 return_total="$total1.$total2"
354 test $total1 -gt $ct_value1 && crit=1
355 test $total1 -eq $ct_value1 -a $total2 -ge $ct_value2 && crit=1
356 test $total1 -gt $wt_value1 && warn=1
357 test $total1 -eq $wt_value1 -a $total2 -ge $wt_value2 && warn=1
358 ;;
359 2)
360 return_total="$total1.$total2"
361 test $total1 -gt $ct_value1 && crit=1
362 test $total1 -eq $ct_value1 -a $total2 -ge $ct_value2 && crit=1
363 test $total1 -gt $wt_value1 && warn=1
364 test $total1 -eq $wt_value1 -a $total2 -ge $wt_value2 && warn=1
365 ;;
366 3)
367 return_total="`test ${#total1} -eq 1 && echo 0`$total1:`test ${#total2} -eq 1 && echo 0`$total2:`test ${#total3} -eq 1 && echo 0`$total3"
368 test $total1 -gt $ct_value1 && crit=1
369 test $total1 -eq $ct_value1 -a $total2 -gt $ct_value2 && crit=1
370 test $total1 -eq $ct_value1 -a $total2 -eq $ct_value2 -a $total3 -ge $ct_value3 && crit=1
371 test $total1 -gt $wt_value1 && warn=1
372 test $total1 -eq $wt_value1 -a $total2 -gt $wt_value2 && warn=1
373 test $total1 -eq $wt_value1 -a $total2 -eq $wt_value2 -a $total3 -ge $wt_value3 && warn=1
374 ;;
375esac
376
377# last check ...
378if [ $crit -eq 1 -a $warn -eq 0 ]
379then
380 echo "Critical value must be greater than warning value !"
381 help_usage
382 exit 3
383fi
384
385# Finally Inform Nagios of what we found...
386if [ $crit -eq 1 ]
387then
388 echo "CRITICAL - total $type_arg_aff for process `echo $process_name|tr \"|\" \",\"` : $return_total"
389 exit 2
390elif [ $warn -eq 1 ]
391then
392 echo "WARNING - total $type_arg_aff for process `echo $process_name|tr \"|\" \",\"` : $return_total"
393 exit 1
394else
395 echo "OK - total $type_arg_aff for process `echo $process_name|tr \"|\" \",\"` : $return_total"
396 exit 0
397fi
398
399# Hey what are we doing here ???
400exit 3 \ No newline at end of file