diff options
Diffstat (limited to 'plugins/check_ntp_time.c')
-rw-r--r-- | plugins/check_ntp_time.c | 877 |
1 files changed, 877 insertions, 0 deletions
diff --git a/plugins/check_ntp_time.c b/plugins/check_ntp_time.c new file mode 100644 index 00000000..164d5190 --- /dev/null +++ b/plugins/check_ntp_time.c | |||
@@ -0,0 +1,877 @@ | |||
1 | /****************************************************************************** | ||
2 | * | ||
3 | * Nagios check_ntp plugin | ||
4 | * | ||
5 | * License: GPL | ||
6 | * Copyright (c) 2006 sean finney <seanius@seanius.net> | ||
7 | * Copyright (c) 2007 nagios-plugins team | ||
8 | * | ||
9 | * Last Modified: $Date$ | ||
10 | * | ||
11 | * Description: | ||
12 | * | ||
13 | * This file contains the check_ntp plugin | ||
14 | * | ||
15 | * This plugin to check ntp servers independant of any commandline | ||
16 | * programs or external libraries. | ||
17 | * | ||
18 | * | ||
19 | * License Information: | ||
20 | * | ||
21 | * This program is free software; you can redistribute it and/or modify | ||
22 | * it under the terms of the GNU General Public License as published by | ||
23 | * the Free Software Foundation; either version 2 of the License, or | ||
24 | * (at your option) any later version. | ||
25 | * | ||
26 | * This program is distributed in the hope that it will be useful, | ||
27 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
28 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
29 | * GNU General Public License for more details. | ||
30 | * | ||
31 | * You should have received a copy of the GNU General Public License | ||
32 | * along with this program; if not, write to the Free Software | ||
33 | * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. | ||
34 | |||
35 | $Id$ | ||
36 | |||
37 | *****************************************************************************/ | ||
38 | |||
39 | const char *progname = "check_ntp"; | ||
40 | const char *revision = "$Revision$"; | ||
41 | const char *copyright = "2007"; | ||
42 | const char *email = "nagiosplug-devel@lists.sourceforge.net"; | ||
43 | |||
44 | #include "common.h" | ||
45 | #include "netutils.h" | ||
46 | #include "utils.h" | ||
47 | |||
48 | static char *server_address=NULL; | ||
49 | static int verbose=0; | ||
50 | static short do_offset=0; | ||
51 | static char *owarn="60"; | ||
52 | static char *ocrit="120"; | ||
53 | static short do_jitter=0; | ||
54 | static char *jwarn="5000"; | ||
55 | static char *jcrit="10000"; | ||
56 | |||
57 | int process_arguments (int, char **); | ||
58 | thresholds *offset_thresholds = NULL; | ||
59 | thresholds *jitter_thresholds = NULL; | ||
60 | void print_help (void); | ||
61 | void print_usage (void); | ||
62 | |||
63 | /* number of times to perform each request to get a good average. */ | ||
64 | #define AVG_NUM 4 | ||
65 | |||
66 | /* max size of control message data */ | ||
67 | #define MAX_CM_SIZE 468 | ||
68 | |||
69 | /* this structure holds everything in an ntp request/response as per rfc1305 */ | ||
70 | typedef struct { | ||
71 | uint8_t flags; /* byte with leapindicator,vers,mode. see macros */ | ||
72 | uint8_t stratum; /* clock stratum */ | ||
73 | int8_t poll; /* polling interval */ | ||
74 | int8_t precision; /* precision of the local clock */ | ||
75 | int32_t rtdelay; /* total rt delay, as a fixed point num. see macros */ | ||
76 | uint32_t rtdisp; /* like above, but for max err to primary src */ | ||
77 | uint32_t refid; /* ref clock identifier */ | ||
78 | uint64_t refts; /* reference timestamp. local time local clock */ | ||
79 | uint64_t origts; /* time at which request departed client */ | ||
80 | uint64_t rxts; /* time at which request arrived at server */ | ||
81 | uint64_t txts; /* time at which request departed server */ | ||
82 | } ntp_message; | ||
83 | |||
84 | /* this structure holds data about results from querying offset from a peer */ | ||
85 | typedef struct { | ||
86 | time_t waiting; /* ts set when we started waiting for a response */ | ||
87 | int num_responses; /* number of successfully recieved responses */ | ||
88 | uint8_t stratum; /* copied verbatim from the ntp_message */ | ||
89 | double rtdelay; /* converted from the ntp_message */ | ||
90 | double rtdisp; /* converted from the ntp_message */ | ||
91 | double offset[AVG_NUM]; /* offsets from each response */ | ||
92 | uint8_t flags; /* byte with leapindicator,vers,mode. see macros */ | ||
93 | } ntp_server_results; | ||
94 | |||
95 | /* this structure holds everything in an ntp control message as per rfc1305 */ | ||
96 | typedef struct { | ||
97 | uint8_t flags; /* byte with leapindicator,vers,mode. see macros */ | ||
98 | uint8_t op; /* R,E,M bits and Opcode */ | ||
99 | uint16_t seq; /* Packet sequence */ | ||
100 | uint16_t status; /* Clock status */ | ||
101 | uint16_t assoc; /* Association */ | ||
102 | uint16_t offset; /* Similar to TCP sequence # */ | ||
103 | uint16_t count; /* # bytes of data */ | ||
104 | char data[MAX_CM_SIZE]; /* ASCII data of the request */ | ||
105 | /* NB: not necessarily NULL terminated! */ | ||
106 | } ntp_control_message; | ||
107 | |||
108 | /* this is an association/status-word pair found in control packet reponses */ | ||
109 | typedef struct { | ||
110 | uint16_t assoc; | ||
111 | uint16_t status; | ||
112 | } ntp_assoc_status_pair; | ||
113 | |||
114 | /* bits 1,2 are the leap indicator */ | ||
115 | #define LI_MASK 0xc0 | ||
116 | #define LI(x) ((x&LI_MASK)>>6) | ||
117 | #define LI_SET(x,y) do{ x |= ((y<<6)&LI_MASK); }while(0) | ||
118 | /* and these are the values of the leap indicator */ | ||
119 | #define LI_NOWARNING 0x00 | ||
120 | #define LI_EXTRASEC 0x01 | ||
121 | #define LI_MISSINGSEC 0x02 | ||
122 | #define LI_ALARM 0x03 | ||
123 | /* bits 3,4,5 are the ntp version */ | ||
124 | #define VN_MASK 0x38 | ||
125 | #define VN(x) ((x&VN_MASK)>>3) | ||
126 | #define VN_SET(x,y) do{ x |= ((y<<3)&VN_MASK); }while(0) | ||
127 | #define VN_RESERVED 0x02 | ||
128 | /* bits 6,7,8 are the ntp mode */ | ||
129 | #define MODE_MASK 0x07 | ||
130 | #define MODE(x) (x&MODE_MASK) | ||
131 | #define MODE_SET(x,y) do{ x |= (y&MODE_MASK); }while(0) | ||
132 | /* here are some values */ | ||
133 | #define MODE_CLIENT 0x03 | ||
134 | #define MODE_CONTROLMSG 0x06 | ||
135 | /* In control message, bits 8-10 are R,E,M bits */ | ||
136 | #define REM_MASK 0xe0 | ||
137 | #define REM_RESP 0x80 | ||
138 | #define REM_ERROR 0x40 | ||
139 | #define REM_MORE 0x20 | ||
140 | /* In control message, bits 11 - 15 are opcode */ | ||
141 | #define OP_MASK 0x1f | ||
142 | #define OP_SET(x,y) do{ x |= (y&OP_MASK); }while(0) | ||
143 | #define OP_READSTAT 0x01 | ||
144 | #define OP_READVAR 0x02 | ||
145 | /* In peer status bytes, bits 6,7,8 determine clock selection status */ | ||
146 | #define PEER_SEL(x) ((ntohs(x)>>8)&0x07) | ||
147 | #define PEER_INCLUDED 0x04 | ||
148 | #define PEER_SYNCSOURCE 0x06 | ||
149 | |||
150 | /** | ||
151 | ** a note about the 32-bit "fixed point" numbers: | ||
152 | ** | ||
153 | they are divided into halves, each being a 16-bit int in network byte order: | ||
154 | - the first 16 bits are an int on the left side of a decimal point. | ||
155 | - the second 16 bits represent a fraction n/(2^16) | ||
156 | likewise for the 64-bit "fixed point" numbers with everything doubled :) | ||
157 | **/ | ||
158 | |||
159 | /* macros to access the left/right 16 bits of a 32-bit ntp "fixed point" | ||
160 | number. note that these can be used as lvalues too */ | ||
161 | #define L16(x) (((uint16_t*)&x)[0]) | ||
162 | #define R16(x) (((uint16_t*)&x)[1]) | ||
163 | /* macros to access the left/right 32 bits of a 64-bit ntp "fixed point" | ||
164 | number. these too can be used as lvalues */ | ||
165 | #define L32(x) (((uint32_t*)&x)[0]) | ||
166 | #define R32(x) (((uint32_t*)&x)[1]) | ||
167 | |||
168 | /* ntp wants seconds since 1/1/00, epoch is 1/1/70. this is the difference */ | ||
169 | #define EPOCHDIFF 0x83aa7e80UL | ||
170 | |||
171 | /* extract a 32-bit ntp fixed point number into a double */ | ||
172 | #define NTP32asDOUBLE(x) (ntohs(L16(x)) + (double)ntohs(R16(x))/65536.0) | ||
173 | |||
174 | /* likewise for a 64-bit ntp fp number */ | ||
175 | #define NTP64asDOUBLE(n) (double)(((uint64_t)n)?\ | ||
176 | (ntohl(L32(n))-EPOCHDIFF) + \ | ||
177 | (.00000001*(0.5+(double)(ntohl(R32(n))/42.94967296))):\ | ||
178 | 0) | ||
179 | |||
180 | /* convert a struct timeval to a double */ | ||
181 | #define TVasDOUBLE(x) (double)(x.tv_sec+(0.000001*x.tv_usec)) | ||
182 | |||
183 | /* convert an ntp 64-bit fp number to a struct timeval */ | ||
184 | #define NTP64toTV(n,t) \ | ||
185 | do{ if(!n) t.tv_sec = t.tv_usec = 0; \ | ||
186 | else { \ | ||
187 | t.tv_sec=ntohl(L32(n))-EPOCHDIFF; \ | ||
188 | t.tv_usec=(int)(0.5+(double)(ntohl(R32(n))/4294.967296)); \ | ||
189 | } \ | ||
190 | }while(0) | ||
191 | |||
192 | /* convert a struct timeval to an ntp 64-bit fp number */ | ||
193 | #define TVtoNTP64(t,n) \ | ||
194 | do{ if(!t.tv_usec && !t.tv_sec) n=0x0UL; \ | ||
195 | else { \ | ||
196 | L32(n)=htonl(t.tv_sec + EPOCHDIFF); \ | ||
197 | R32(n)=htonl((uint64_t)((4294.967296*t.tv_usec)+.5)); \ | ||
198 | } \ | ||
199 | } while(0) | ||
200 | |||
201 | /* NTP control message header is 12 bytes, plus any data in the data | ||
202 | * field, plus null padding to the nearest 32-bit boundary per rfc. | ||
203 | */ | ||
204 | #define SIZEOF_NTPCM(m) (12+ntohs(m.count)+((m.count)?4-(ntohs(m.count)%4):0)) | ||
205 | |||
206 | /* finally, a little helper or two for debugging: */ | ||
207 | #define DBG(x) do{if(verbose>1){ x; }}while(0); | ||
208 | #define PRINTSOCKADDR(x) \ | ||
209 | do{ \ | ||
210 | printf("%u.%u.%u.%u", (x>>24)&0xff, (x>>16)&0xff, (x>>8)&0xff, x&0xff);\ | ||
211 | }while(0); | ||
212 | |||
213 | /* calculate the offset of the local clock */ | ||
214 | static inline double calc_offset(const ntp_message *m, const struct timeval *t){ | ||
215 | double client_tx, peer_rx, peer_tx, client_rx; | ||
216 | client_tx = NTP64asDOUBLE(m->origts); | ||
217 | peer_rx = NTP64asDOUBLE(m->rxts); | ||
218 | peer_tx = NTP64asDOUBLE(m->txts); | ||
219 | client_rx=TVasDOUBLE((*t)); | ||
220 | return (.5*((peer_tx-client_rx)+(peer_rx-client_tx))); | ||
221 | } | ||
222 | |||
223 | /* print out a ntp packet in human readable/debuggable format */ | ||
224 | void print_ntp_message(const ntp_message *p){ | ||
225 | struct timeval ref, orig, rx, tx; | ||
226 | |||
227 | NTP64toTV(p->refts,ref); | ||
228 | NTP64toTV(p->origts,orig); | ||
229 | NTP64toTV(p->rxts,rx); | ||
230 | NTP64toTV(p->txts,tx); | ||
231 | |||
232 | printf("packet contents:\n"); | ||
233 | printf("\tflags: 0x%.2x\n", p->flags); | ||
234 | printf("\t li=%d (0x%.2x)\n", LI(p->flags), p->flags&LI_MASK); | ||
235 | printf("\t vn=%d (0x%.2x)\n", VN(p->flags), p->flags&VN_MASK); | ||
236 | printf("\t mode=%d (0x%.2x)\n", MODE(p->flags), p->flags&MODE_MASK); | ||
237 | printf("\tstratum = %d\n", p->stratum); | ||
238 | printf("\tpoll = %g\n", pow(2, p->poll)); | ||
239 | printf("\tprecision = %g\n", pow(2, p->precision)); | ||
240 | printf("\trtdelay = %-.16g\n", NTP32asDOUBLE(p->rtdelay)); | ||
241 | printf("\trtdisp = %-.16g\n", NTP32asDOUBLE(p->rtdisp)); | ||
242 | printf("\trefid = %x\n", p->refid); | ||
243 | printf("\trefts = %-.16g\n", NTP64asDOUBLE(p->refts)); | ||
244 | printf("\torigts = %-.16g\n", NTP64asDOUBLE(p->origts)); | ||
245 | printf("\trxts = %-.16g\n", NTP64asDOUBLE(p->rxts)); | ||
246 | printf("\ttxts = %-.16g\n", NTP64asDOUBLE(p->txts)); | ||
247 | } | ||
248 | |||
249 | void print_ntp_control_message(const ntp_control_message *p){ | ||
250 | int i=0, numpeers=0; | ||
251 | const ntp_assoc_status_pair *peer=NULL; | ||
252 | |||
253 | printf("control packet contents:\n"); | ||
254 | printf("\tflags: 0x%.2x , 0x%.2x\n", p->flags, p->op); | ||
255 | printf("\t li=%d (0x%.2x)\n", LI(p->flags), p->flags&LI_MASK); | ||
256 | printf("\t vn=%d (0x%.2x)\n", VN(p->flags), p->flags&VN_MASK); | ||
257 | printf("\t mode=%d (0x%.2x)\n", MODE(p->flags), p->flags&MODE_MASK); | ||
258 | printf("\t response=%d (0x%.2x)\n", (p->op&REM_RESP)>0, p->op&REM_RESP); | ||
259 | printf("\t more=%d (0x%.2x)\n", (p->op&REM_MORE)>0, p->op&REM_MORE); | ||
260 | printf("\t error=%d (0x%.2x)\n", (p->op&REM_ERROR)>0, p->op&REM_ERROR); | ||
261 | printf("\t op=%d (0x%.2x)\n", p->op&OP_MASK, p->op&OP_MASK); | ||
262 | printf("\tsequence: %d (0x%.2x)\n", ntohs(p->seq), ntohs(p->seq)); | ||
263 | printf("\tstatus: %d (0x%.2x)\n", ntohs(p->status), ntohs(p->status)); | ||
264 | printf("\tassoc: %d (0x%.2x)\n", ntohs(p->assoc), ntohs(p->assoc)); | ||
265 | printf("\toffset: %d (0x%.2x)\n", ntohs(p->offset), ntohs(p->offset)); | ||
266 | printf("\tcount: %d (0x%.2x)\n", ntohs(p->count), ntohs(p->count)); | ||
267 | numpeers=ntohs(p->count)/(sizeof(ntp_assoc_status_pair)); | ||
268 | if(p->op&REM_RESP && p->op&OP_READSTAT){ | ||
269 | peer=(ntp_assoc_status_pair*)p->data; | ||
270 | for(i=0;i<numpeers;i++){ | ||
271 | printf("\tpeer id %.2x status %.2x", | ||
272 | ntohs(peer[i].assoc), ntohs(peer[i].status)); | ||
273 | if (PEER_SEL(peer[i].status) >= PEER_INCLUDED){ | ||
274 | if(PEER_SEL(peer[i].status) >= PEER_SYNCSOURCE){ | ||
275 | printf(" <-- current sync source"); | ||
276 | } else { | ||
277 | printf(" <-- current sync candidate"); | ||
278 | } | ||
279 | } | ||
280 | printf("\n"); | ||
281 | } | ||
282 | } | ||
283 | } | ||
284 | |||
285 | void setup_request(ntp_message *p){ | ||
286 | struct timeval t; | ||
287 | |||
288 | memset(p, 0, sizeof(ntp_message)); | ||
289 | LI_SET(p->flags, LI_ALARM); | ||
290 | VN_SET(p->flags, 4); | ||
291 | MODE_SET(p->flags, MODE_CLIENT); | ||
292 | p->poll=4; | ||
293 | p->precision=(int8_t)0xfa; | ||
294 | L16(p->rtdelay)=htons(1); | ||
295 | L16(p->rtdisp)=htons(1); | ||
296 | |||
297 | gettimeofday(&t, NULL); | ||
298 | TVtoNTP64(t,p->txts); | ||
299 | } | ||
300 | |||
301 | /* select the "best" server from a list of servers, and return its index. | ||
302 | * this is done by filtering servers based on stratum, dispersion, and | ||
303 | * finally round-trip delay. */ | ||
304 | int best_offset_server(const ntp_server_results *slist, int nservers){ | ||
305 | int i=0, j=0, cserver=0, candidates[5], csize=0; | ||
306 | |||
307 | /* for each server */ | ||
308 | for(cserver=0; cserver<nservers; cserver++){ | ||
309 | /* sort out servers with error flags */ | ||
310 | if ( LI(slist[cserver].flags) != LI_NOWARNING ){ | ||
311 | if (verbose) printf("discarding peer id %d: flags=%d\n", cserver, LI(slist[cserver].flags)); | ||
312 | break; | ||
313 | } | ||
314 | |||
315 | /* compare it to each of the servers already in the candidate list */ | ||
316 | for(i=0; i<csize; i++){ | ||
317 | /* does it have an equal or better stratum? */ | ||
318 | if(slist[cserver].stratum <= slist[i].stratum){ | ||
319 | /* does it have an equal or better dispersion? */ | ||
320 | if(slist[cserver].rtdisp <= slist[i].rtdisp){ | ||
321 | /* does it have a better rtdelay? */ | ||
322 | if(slist[cserver].rtdelay < slist[i].rtdelay){ | ||
323 | break; | ||
324 | } | ||
325 | } | ||
326 | } | ||
327 | } | ||
328 | |||
329 | /* if we haven't reached the current list's end, move everyone | ||
330 | * over one to the right, and insert the new candidate */ | ||
331 | if(i<csize){ | ||
332 | for(j=5; j>i; j--){ | ||
333 | candidates[j]=candidates[j-1]; | ||
334 | } | ||
335 | } | ||
336 | /* regardless, if they should be on the list... */ | ||
337 | if(i<5) { | ||
338 | candidates[i]=cserver; | ||
339 | if(csize<5) csize++; | ||
340 | /* otherwise discard the server */ | ||
341 | } else { | ||
342 | DBG(printf("discarding peer id %d\n", cserver)); | ||
343 | } | ||
344 | } | ||
345 | |||
346 | if(csize>0) { | ||
347 | DBG(printf("best server selected: peer %d\n", candidates[0])); | ||
348 | return candidates[0]; | ||
349 | } else { | ||
350 | DBG(printf("no peers meeting synchronization criteria :(\n")); | ||
351 | return -1; | ||
352 | } | ||
353 | } | ||
354 | |||
355 | /* do everything we need to get the total average offset | ||
356 | * - we use a certain amount of parallelization with poll() to ensure | ||
357 | * we don't waste time sitting around waiting for single packets. | ||
358 | * - we also "manually" handle resolving host names and connecting, because | ||
359 | * we have to do it in a way that our lazy macros don't handle currently :( */ | ||
360 | double offset_request(const char *host, int *status){ | ||
361 | int i=0, j=0, ga_result=0, num_hosts=0, *socklist=NULL, respnum=0; | ||
362 | int servers_completed=0, one_written=0, one_read=0, servers_readable=0, best_index=-1; | ||
363 | time_t now_time=0, start_ts=0; | ||
364 | ntp_message *req=NULL; | ||
365 | double avg_offset=0.; | ||
366 | struct timeval recv_time; | ||
367 | struct addrinfo *ai=NULL, *ai_tmp=NULL, hints; | ||
368 | struct pollfd *ufds=NULL; | ||
369 | ntp_server_results *servers=NULL; | ||
370 | |||
371 | /* setup hints to only return results from getaddrinfo that we'd like */ | ||
372 | memset(&hints, 0, sizeof(struct addrinfo)); | ||
373 | hints.ai_family = address_family; | ||
374 | hints.ai_protocol = IPPROTO_UDP; | ||
375 | hints.ai_socktype = SOCK_DGRAM; | ||
376 | |||
377 | /* fill in ai with the list of hosts resolved by the host name */ | ||
378 | ga_result = getaddrinfo(host, "123", &hints, &ai); | ||
379 | if(ga_result!=0){ | ||
380 | die(STATE_UNKNOWN, "error getting address for %s: %s\n", | ||
381 | host, gai_strerror(ga_result)); | ||
382 | } | ||
383 | |||
384 | /* count the number of returned hosts, and allocate stuff accordingly */ | ||
385 | for(ai_tmp=ai; ai_tmp!=NULL; ai_tmp=ai_tmp->ai_next){ num_hosts++; } | ||
386 | req=(ntp_message*)malloc(sizeof(ntp_message)*num_hosts); | ||
387 | if(req==NULL) die(STATE_UNKNOWN, "can not allocate ntp message array"); | ||
388 | socklist=(int*)malloc(sizeof(int)*num_hosts); | ||
389 | if(socklist==NULL) die(STATE_UNKNOWN, "can not allocate socket array"); | ||
390 | ufds=(struct pollfd*)malloc(sizeof(struct pollfd)*num_hosts); | ||
391 | if(ufds==NULL) die(STATE_UNKNOWN, "can not allocate socket array"); | ||
392 | servers=(ntp_server_results*)malloc(sizeof(ntp_server_results)*num_hosts); | ||
393 | if(servers==NULL) die(STATE_UNKNOWN, "can not allocate server array"); | ||
394 | memset(servers, 0, sizeof(ntp_server_results)*num_hosts); | ||
395 | |||
396 | /* setup each socket for writing, and the corresponding struct pollfd */ | ||
397 | ai_tmp=ai; | ||
398 | for(i=0;ai_tmp;i++){ | ||
399 | socklist[i]=socket(ai_tmp->ai_family, SOCK_DGRAM, IPPROTO_UDP); | ||
400 | if(socklist[i] == -1) { | ||
401 | perror(NULL); | ||
402 | die(STATE_UNKNOWN, "can not create new socket"); | ||
403 | } | ||
404 | if(connect(socklist[i], ai_tmp->ai_addr, ai_tmp->ai_addrlen)){ | ||
405 | die(STATE_UNKNOWN, "can't create socket connection"); | ||
406 | } else { | ||
407 | ufds[i].fd=socklist[i]; | ||
408 | ufds[i].events=POLLIN; | ||
409 | ufds[i].revents=0; | ||
410 | } | ||
411 | ai_tmp = ai_tmp->ai_next; | ||
412 | } | ||
413 | |||
414 | /* now do AVG_NUM checks to each host. we stop before timeout/2 seconds | ||
415 | * have passed in order to ensure post-processing and jitter time. */ | ||
416 | now_time=start_ts=time(NULL); | ||
417 | while(servers_completed<num_hosts && now_time-start_ts <= socket_timeout/2){ | ||
418 | /* loop through each server and find each one which hasn't | ||
419 | * been touched in the past second or so and is still lacking | ||
420 | * some responses. for each of these servers, send a new request, | ||
421 | * and update the "waiting" timestamp with the current time. */ | ||
422 | one_written=0; | ||
423 | now_time=time(NULL); | ||
424 | |||
425 | for(i=0; i<num_hosts; i++){ | ||
426 | if(servers[i].waiting<now_time && servers[i].num_responses<AVG_NUM){ | ||
427 | if(verbose && servers[i].waiting != 0) printf("re-"); | ||
428 | if(verbose) printf("sending request to peer %d\n", i); | ||
429 | setup_request(&req[i]); | ||
430 | write(socklist[i], &req[i], sizeof(ntp_message)); | ||
431 | servers[i].waiting=now_time; | ||
432 | one_written=1; | ||
433 | break; | ||
434 | } | ||
435 | } | ||
436 | |||
437 | /* quickly poll for any sockets with pending data */ | ||
438 | servers_readable=poll(ufds, num_hosts, 100); | ||
439 | if(servers_readable==-1){ | ||
440 | perror("polling ntp sockets"); | ||
441 | die(STATE_UNKNOWN, "communication errors"); | ||
442 | } | ||
443 | |||
444 | /* read from any sockets with pending data */ | ||
445 | for(i=0; servers_readable && i<num_hosts; i++){ | ||
446 | if(ufds[i].revents&POLLIN && servers[i].num_responses < AVG_NUM){ | ||
447 | if(verbose) { | ||
448 | printf("response from peer %d: ", i); | ||
449 | } | ||
450 | |||
451 | read(ufds[i].fd, &req[i], sizeof(ntp_message)); | ||
452 | gettimeofday(&recv_time, NULL); | ||
453 | DBG(print_ntp_message(&req[i])); | ||
454 | respnum=servers[i].num_responses++; | ||
455 | servers[i].offset[respnum]=calc_offset(&req[i], &recv_time); | ||
456 | if(verbose) { | ||
457 | printf("offset %.10g\n", servers[i].offset[respnum]); | ||
458 | } | ||
459 | servers[i].stratum=req[i].stratum; | ||
460 | servers[i].rtdisp=NTP32asDOUBLE(req[i].rtdisp); | ||
461 | servers[i].rtdelay=NTP32asDOUBLE(req[i].rtdelay); | ||
462 | servers[i].waiting=0; | ||
463 | servers[i].flags=req[i].flags; | ||
464 | servers_readable--; | ||
465 | one_read = 1; | ||
466 | if(servers[i].num_responses==AVG_NUM) servers_completed++; | ||
467 | } | ||
468 | } | ||
469 | /* lather, rinse, repeat. */ | ||
470 | } | ||
471 | |||
472 | if (one_read == 0) { | ||
473 | die(STATE_CRITICAL, "NTP CRITICAL: No response from NTP server\n"); | ||
474 | } | ||
475 | |||
476 | /* now, pick the best server from the list */ | ||
477 | best_index=best_offset_server(servers, num_hosts); | ||
478 | if(best_index < 0){ | ||
479 | *status=STATE_UNKNOWN; | ||
480 | } else { | ||
481 | /* finally, calculate the average offset */ | ||
482 | for(i=0; i<servers[best_index].num_responses;i++){ | ||
483 | avg_offset+=servers[best_index].offset[j]; | ||
484 | } | ||
485 | avg_offset/=servers[best_index].num_responses; | ||
486 | } | ||
487 | |||
488 | /* cleanup */ | ||
489 | /* FIXME: Not closing the socket to avoid re-use of the local port | ||
490 | * which can cause old NTP packets to be read instead of NTP control | ||
491 | * pactets in jitter_request(). THERE MUST BE ANOTHER WAY... | ||
492 | * for(j=0; j<num_hosts; j++){ close(socklist[j]); } */ | ||
493 | free(socklist); | ||
494 | free(ufds); | ||
495 | free(servers); | ||
496 | free(req); | ||
497 | freeaddrinfo(ai); | ||
498 | |||
499 | if(verbose) printf("overall average offset: %.10g\n", avg_offset); | ||
500 | return avg_offset; | ||
501 | } | ||
502 | |||
503 | void | ||
504 | setup_control_request(ntp_control_message *p, uint8_t opcode, uint16_t seq){ | ||
505 | memset(p, 0, sizeof(ntp_control_message)); | ||
506 | LI_SET(p->flags, LI_NOWARNING); | ||
507 | VN_SET(p->flags, VN_RESERVED); | ||
508 | MODE_SET(p->flags, MODE_CONTROLMSG); | ||
509 | OP_SET(p->op, opcode); | ||
510 | p->seq = htons(seq); | ||
511 | /* Remaining fields are zero for requests */ | ||
512 | } | ||
513 | |||
514 | /* XXX handle responses with the error bit set */ | ||
515 | double jitter_request(const char *host, int *status){ | ||
516 | int conn=-1, i, npeers=0, num_candidates=0, syncsource_found=0; | ||
517 | int run=0, min_peer_sel=PEER_INCLUDED, num_selected=0, num_valid=0; | ||
518 | int peers_size=0, peer_offset=0; | ||
519 | ntp_assoc_status_pair *peers=NULL; | ||
520 | ntp_control_message req; | ||
521 | const char *getvar = "jitter"; | ||
522 | double rval = 0.0, jitter = -1.0; | ||
523 | char *startofvalue=NULL, *nptr=NULL; | ||
524 | void *tmp; | ||
525 | |||
526 | /* Long-winded explanation: | ||
527 | * Getting the jitter requires a number of steps: | ||
528 | * 1) Send a READSTAT request. | ||
529 | * 2) Interpret the READSTAT reply | ||
530 | * a) The data section contains a list of peer identifiers (16 bits) | ||
531 | * and associated status words (16 bits) | ||
532 | * b) We want the value of 0x06 in the SEL (peer selection) value, | ||
533 | * which means "current synchronizatin source". If that's missing, | ||
534 | * we take anything better than 0x04 (see the rfc for details) but | ||
535 | * set a minimum of warning. | ||
536 | * 3) Send a READVAR request for information on each peer identified | ||
537 | * in 2b greater than the minimum selection value. | ||
538 | * 4) Extract the jitter value from the data[] (it's ASCII) | ||
539 | */ | ||
540 | my_udp_connect(server_address, 123, &conn); | ||
541 | |||
542 | /* keep sending requests until the server stops setting the | ||
543 | * REM_MORE bit, though usually this is only 1 packet. */ | ||
544 | do{ | ||
545 | setup_control_request(&req, OP_READSTAT, 1); | ||
546 | DBG(printf("sending READSTAT request")); | ||
547 | write(conn, &req, SIZEOF_NTPCM(req)); | ||
548 | DBG(print_ntp_control_message(&req)); | ||
549 | /* Attempt to read the largest size packet possible */ | ||
550 | req.count=htons(MAX_CM_SIZE); | ||
551 | DBG(printf("recieving READSTAT response")) | ||
552 | read(conn, &req, SIZEOF_NTPCM(req)); | ||
553 | DBG(print_ntp_control_message(&req)); | ||
554 | /* Each peer identifier is 4 bytes in the data section, which | ||
555 | * we represent as a ntp_assoc_status_pair datatype. | ||
556 | */ | ||
557 | peers_size+=ntohs(req.count); | ||
558 | if((tmp=realloc(peers, peers_size)) == NULL) | ||
559 | free(peers), die(STATE_UNKNOWN, "can not (re)allocate 'peers' buffer\n"); | ||
560 | peers=tmp; | ||
561 | memcpy((void*)((ptrdiff_t)peers+peer_offset), (void*)req.data, ntohs(req.count)); | ||
562 | npeers=peers_size/sizeof(ntp_assoc_status_pair); | ||
563 | peer_offset+=ntohs(req.count); | ||
564 | } while(req.op&REM_MORE); | ||
565 | |||
566 | /* first, let's find out if we have a sync source, or if there are | ||
567 | * at least some candidates. in the case of the latter we'll issue | ||
568 | * a warning but go ahead with the check on them. */ | ||
569 | for (i = 0; i < npeers; i++){ | ||
570 | if (PEER_SEL(peers[i].status) >= PEER_INCLUDED){ | ||
571 | num_candidates++; | ||
572 | if(PEER_SEL(peers[i].status) >= PEER_SYNCSOURCE){ | ||
573 | syncsource_found=1; | ||
574 | min_peer_sel=PEER_SYNCSOURCE; | ||
575 | } | ||
576 | } | ||
577 | } | ||
578 | if(verbose) printf("%d candiate peers available\n", num_candidates); | ||
579 | if(verbose && syncsource_found) printf("synchronization source found\n"); | ||
580 | if(! syncsource_found){ | ||
581 | *status = STATE_UNKNOWN; | ||
582 | if(verbose) printf("warning: no synchronization source found\n"); | ||
583 | } | ||
584 | |||
585 | |||
586 | for (run=0; run<AVG_NUM; run++){ | ||
587 | if(verbose) printf("jitter run %d of %d\n", run+1, AVG_NUM); | ||
588 | for (i = 0; i < npeers; i++){ | ||
589 | /* Only query this server if it is the current sync source */ | ||
590 | if (PEER_SEL(peers[i].status) >= min_peer_sel){ | ||
591 | num_selected++; | ||
592 | setup_control_request(&req, OP_READVAR, 2); | ||
593 | req.assoc = peers[i].assoc; | ||
594 | /* By spec, putting the variable name "jitter" in the request | ||
595 | * should cause the server to provide _only_ the jitter value. | ||
596 | * thus reducing net traffic, guaranteeing us only a single | ||
597 | * datagram in reply, and making intepretation much simpler | ||
598 | */ | ||
599 | /* Older servers doesn't know what jitter is, so if we get an | ||
600 | * error on the first pass we redo it with "dispersion" */ | ||
601 | strncpy(req.data, getvar, MAX_CM_SIZE-1); | ||
602 | req.count = htons(strlen(getvar)); | ||
603 | DBG(printf("sending READVAR request...\n")); | ||
604 | write(conn, &req, SIZEOF_NTPCM(req)); | ||
605 | DBG(print_ntp_control_message(&req)); | ||
606 | |||
607 | req.count = htons(MAX_CM_SIZE); | ||
608 | DBG(printf("recieving READVAR response...\n")); | ||
609 | read(conn, &req, SIZEOF_NTPCM(req)); | ||
610 | DBG(print_ntp_control_message(&req)); | ||
611 | |||
612 | if(req.op&REM_ERROR && strstr(getvar, "jitter")) { | ||
613 | if(verbose) printf("The 'jitter' command failed (old ntp server?)\nRestarting with 'dispersion'...\n"); | ||
614 | getvar = "dispersion"; | ||
615 | num_selected--; | ||
616 | i--; | ||
617 | continue; | ||
618 | } | ||
619 | |||
620 | /* get to the float value */ | ||
621 | if(verbose) { | ||
622 | printf("parsing jitter from peer %.2x: ", ntohs(peers[i].assoc)); | ||
623 | } | ||
624 | startofvalue = strchr(req.data, '='); | ||
625 | if(startofvalue != NULL) { | ||
626 | startofvalue++; | ||
627 | jitter = strtod(startofvalue, &nptr); | ||
628 | } | ||
629 | if(startofvalue == NULL || startofvalue==nptr){ | ||
630 | printf("warning: unable to read server jitter response.\n"); | ||
631 | *status = STATE_UNKNOWN; | ||
632 | } else { | ||
633 | if(verbose) printf("%g\n", jitter); | ||
634 | num_valid++; | ||
635 | rval += jitter; | ||
636 | } | ||
637 | } | ||
638 | } | ||
639 | if(verbose){ | ||
640 | printf("jitter parsed from %d/%d peers\n", num_valid, num_selected); | ||
641 | } | ||
642 | } | ||
643 | |||
644 | rval = num_valid ? rval / num_valid : -1.0; | ||
645 | |||
646 | close(conn); | ||
647 | if(peers!=NULL) free(peers); | ||
648 | /* If we return -1.0, it means no synchronization source was found */ | ||
649 | return rval; | ||
650 | } | ||
651 | |||
652 | int process_arguments(int argc, char **argv){ | ||
653 | int c; | ||
654 | int option=0; | ||
655 | static struct option longopts[] = { | ||
656 | {"version", no_argument, 0, 'V'}, | ||
657 | {"help", no_argument, 0, 'h'}, | ||
658 | {"verbose", no_argument, 0, 'v'}, | ||
659 | {"use-ipv4", no_argument, 0, '4'}, | ||
660 | {"use-ipv6", no_argument, 0, '6'}, | ||
661 | {"warning", required_argument, 0, 'w'}, | ||
662 | {"critical", required_argument, 0, 'c'}, | ||
663 | {"jwarn", required_argument, 0, 'j'}, | ||
664 | {"jcrit", required_argument, 0, 'k'}, | ||
665 | {"timeout", required_argument, 0, 't'}, | ||
666 | {"hostname", required_argument, 0, 'H'}, | ||
667 | {0, 0, 0, 0} | ||
668 | }; | ||
669 | |||
670 | |||
671 | if (argc < 2) | ||
672 | usage ("\n"); | ||
673 | |||
674 | while (1) { | ||
675 | c = getopt_long (argc, argv, "Vhv46w:c:j:k:t:H:", longopts, &option); | ||
676 | if (c == -1 || c == EOF || c == 1) | ||
677 | break; | ||
678 | |||
679 | switch (c) { | ||
680 | case 'h': | ||
681 | print_help(); | ||
682 | exit(STATE_OK); | ||
683 | break; | ||
684 | case 'V': | ||
685 | print_revision(progname, revision); | ||
686 | exit(STATE_OK); | ||
687 | break; | ||
688 | case 'v': | ||
689 | verbose++; | ||
690 | break; | ||
691 | case 'w': | ||
692 | do_offset=1; | ||
693 | owarn = optarg; | ||
694 | break; | ||
695 | case 'c': | ||
696 | do_offset=1; | ||
697 | ocrit = optarg; | ||
698 | break; | ||
699 | case 'j': | ||
700 | do_jitter=1; | ||
701 | jwarn = optarg; | ||
702 | break; | ||
703 | case 'k': | ||
704 | do_jitter=1; | ||
705 | jcrit = optarg; | ||
706 | break; | ||
707 | case 'H': | ||
708 | if(is_host(optarg) == FALSE) | ||
709 | usage2(_("Invalid hostname/address"), optarg); | ||
710 | server_address = strdup(optarg); | ||
711 | break; | ||
712 | case 't': | ||
713 | socket_timeout=atoi(optarg); | ||
714 | break; | ||
715 | case '4': | ||
716 | address_family = AF_INET; | ||
717 | break; | ||
718 | case '6': | ||
719 | #ifdef USE_IPV6 | ||
720 | address_family = AF_INET6; | ||
721 | #else | ||
722 | usage4 (_("IPv6 support not available")); | ||
723 | #endif | ||
724 | break; | ||
725 | case '?': | ||
726 | /* print short usage statement if args not parsable */ | ||
727 | usage5 (); | ||
728 | break; | ||
729 | } | ||
730 | } | ||
731 | |||
732 | if(server_address == NULL){ | ||
733 | usage4(_("Hostname was not supplied")); | ||
734 | } | ||
735 | |||
736 | return 0; | ||
737 | } | ||
738 | |||
739 | char *perfd_offset (double offset) | ||
740 | { | ||
741 | return fperfdata ("offset", offset, "s", | ||
742 | TRUE, offset_thresholds->warning->end, | ||
743 | TRUE, offset_thresholds->critical->end, | ||
744 | FALSE, 0, FALSE, 0); | ||
745 | } | ||
746 | |||
747 | char *perfd_jitter (double jitter) | ||
748 | { | ||
749 | return fperfdata ("jitter", jitter, "s", | ||
750 | do_jitter, jitter_thresholds->warning->end, | ||
751 | do_jitter, jitter_thresholds->critical->end, | ||
752 | TRUE, 0, FALSE, 0); | ||
753 | } | ||
754 | |||
755 | int main(int argc, char *argv[]){ | ||
756 | int result, offset_result, jitter_result; | ||
757 | double offset=0, jitter=0; | ||
758 | char *result_line, *perfdata_line; | ||
759 | |||
760 | result = offset_result = jitter_result = STATE_OK; | ||
761 | |||
762 | if (process_arguments (argc, argv) == ERROR) | ||
763 | usage4 (_("Could not parse arguments")); | ||
764 | |||
765 | set_thresholds(&offset_thresholds, owarn, ocrit); | ||
766 | set_thresholds(&jitter_thresholds, jwarn, jcrit); | ||
767 | |||
768 | /* initialize alarm signal handling */ | ||
769 | signal (SIGALRM, socket_timeout_alarm_handler); | ||
770 | |||
771 | /* set socket timeout */ | ||
772 | alarm (socket_timeout); | ||
773 | |||
774 | offset = offset_request(server_address, &offset_result); | ||
775 | /* check_ntp used to always return CRITICAL if offset_result == STATE_UNKNOWN. | ||
776 | * Now we'll only do that is the offset thresholds were set */ | ||
777 | if (do_offset && offset_result == STATE_UNKNOWN) { | ||
778 | result = STATE_CRITICAL; | ||
779 | } else { | ||
780 | result = get_status(fabs(offset), offset_thresholds); | ||
781 | } | ||
782 | |||
783 | /* If not told to check the jitter, we don't even send packets. | ||
784 | * jitter is checked using NTP control packets, which not all | ||
785 | * servers recognize. Trying to check the jitter on OpenNTPD | ||
786 | * (for example) will result in an error | ||
787 | */ | ||
788 | if(do_jitter){ | ||
789 | jitter=jitter_request(server_address, &jitter_result); | ||
790 | result = max_state_alt(result, get_status(jitter, jitter_thresholds)); | ||
791 | /* -1 indicates that we couldn't calculate the jitter | ||
792 | * Only overrides STATE_OK from the offset */ | ||
793 | if(jitter == -1.0 && result == STATE_OK) | ||
794 | result = STATE_UNKNOWN; | ||
795 | } | ||
796 | result = max_state_alt(result, jitter_result); | ||
797 | |||
798 | switch (result) { | ||
799 | case STATE_CRITICAL : | ||
800 | asprintf(&result_line, "NTP CRITICAL:"); | ||
801 | break; | ||
802 | case STATE_WARNING : | ||
803 | asprintf(&result_line, "NTP WARNING:"); | ||
804 | break; | ||
805 | case STATE_OK : | ||
806 | asprintf(&result_line, "NTP OK:"); | ||
807 | break; | ||
808 | default : | ||
809 | asprintf(&result_line, "NTP UNKNOWN:"); | ||
810 | break; | ||
811 | } | ||
812 | if(offset_result == STATE_UNKNOWN){ | ||
813 | asprintf(&result_line, "%s %s", result_line, _("Offset unknown")); | ||
814 | asprintf(&perfdata_line, ""); | ||
815 | } else { | ||
816 | asprintf(&result_line, "%s Offset %.10g secs", result_line, offset); | ||
817 | asprintf(&perfdata_line, "%s", perfd_offset(offset)); | ||
818 | } | ||
819 | if (do_jitter) { | ||
820 | asprintf(&result_line, "%s, jitter=%f", result_line, jitter); | ||
821 | asprintf(&perfdata_line, "%s %s", perfdata_line, perfd_jitter(jitter)); | ||
822 | } | ||
823 | printf("%s|%s\n", result_line, perfdata_line); | ||
824 | |||
825 | if(server_address!=NULL) free(server_address); | ||
826 | return result; | ||
827 | } | ||
828 | |||
829 | |||
830 | |||
831 | void print_help(void){ | ||
832 | print_revision(progname, revision); | ||
833 | |||
834 | printf ("Copyright (c) 2006 Sean Finney\n"); | ||
835 | printf (COPYRIGHT, copyright, email); | ||
836 | |||
837 | printf ("%s\n", _("This plugin checks the selected ntp server")); | ||
838 | |||
839 | printf ("\n\n"); | ||
840 | |||
841 | print_usage(); | ||
842 | printf (_(UT_HELP_VRSN)); | ||
843 | printf (_(UT_HOST_PORT), 'p', "123"); | ||
844 | printf (" %s\n", "-w, --warning=THRESHOLD"); | ||
845 | printf (" %s\n", _("Offset to result in warning status (seconds)")); | ||
846 | printf (" %s\n", "-c, --critical=THRESHOLD"); | ||
847 | printf (" %s\n", _("Offset to result in critical status (seconds)")); | ||
848 | printf (" %s\n", "-j, --warning=THRESHOLD"); | ||
849 | printf (" %s\n", _("Warning threshold for jitter")); | ||
850 | printf (" %s\n", "-k, --critical=THRESHOLD"); | ||
851 | printf (" %s\n", _("Critical threshold for jitter")); | ||
852 | printf (_(UT_TIMEOUT), DEFAULT_SOCKET_TIMEOUT); | ||
853 | printf (_(UT_VERBOSE)); | ||
854 | |||
855 | printf("\n"); | ||
856 | printf("%s\n", _("Notes:")); | ||
857 | printf(" %s\n", _("See:")); | ||
858 | printf(" %s\n", ("http://nagiosplug.sourceforge.net/developer-guidelines.html#THRESHOLDFORMAT")); | ||
859 | printf(" %s\n", _("for THRESHOLD format and examples.")); | ||
860 | |||
861 | printf("\n"); | ||
862 | printf("%s\n", _("Examples:")); | ||
863 | printf(" %s\n", _("Normal offset check:")); | ||
864 | printf(" %s\n", ("./check_ntp -H ntpserv -w 0.5 -c 1")); | ||
865 | printf(" %s\n", _("Check jitter too, avoiding critical notifications if jitter isn't available")); | ||
866 | printf(" %s\n", _("(See Notes above for more details on thresholds formats):")); | ||
867 | printf(" %s\n", ("./check_ntp -H ntpserv -w 0.5 -c 1 -j -1:100 -k -1:200")); | ||
868 | |||
869 | printf (_(UT_SUPPORT)); | ||
870 | } | ||
871 | |||
872 | void | ||
873 | print_usage(void) | ||
874 | { | ||
875 | printf (_("Usage:")); | ||
876 | printf(" %s -H <host> [-w <warn>] [-c <crit>] [-j <warn>] [-k <crit>] [-v verbose]\n", progname); | ||
877 | } | ||