diff options
-rw-r--r-- | plugins/Makefile.am | 8 | ||||
-rw-r--r-- | plugins/check_ntpd.c | 464 | ||||
-rw-r--r-- | plugins/check_time_ntp.c | 6 | ||||
-rw-r--r-- | plugins/t/check_ntpd.t | 57 | ||||
-rw-r--r-- | plugins/t/check_time_ntp.t | 57 |
5 files changed, 240 insertions, 352 deletions
diff --git a/plugins/Makefile.am b/plugins/Makefile.am index c0486bc1..782721a5 100644 --- a/plugins/Makefile.am +++ b/plugins/Makefile.am | |||
@@ -14,8 +14,8 @@ MATHLIBS = @MATHLIBS@ | |||
14 | #AM_CFLAGS = -Wall | 14 | #AM_CFLAGS = -Wall |
15 | 15 | ||
16 | libexec_PROGRAMS = check_apt check_cluster check_disk check_dummy check_http check_load \ | 16 | libexec_PROGRAMS = check_apt check_cluster check_disk check_dummy check_http check_load \ |
17 | check_mrtg check_mrtgtraf check_ntp check_nwstat check_overcr check_ping \ | 17 | check_mrtg check_mrtgtraf check_ntp check_ntpd check_nwstat check_overcr check_ping \ |
18 | check_real check_smtp check_ssh check_tcp check_time \ | 18 | check_real check_smtp check_ssh check_tcp check_time check_time_ntp \ |
19 | check_ups check_users negate \ | 19 | check_ups check_users negate \ |
20 | urlize @EXTRAS@ | 20 | urlize @EXTRAS@ |
21 | 21 | ||
@@ -70,6 +70,7 @@ check_mysql_query_LDADD = $(NETLIBS) $(MYSQLLIBS) | |||
70 | check_nagios_LDADD = $(BASEOBJS) runcmd.o | 70 | check_nagios_LDADD = $(BASEOBJS) runcmd.o |
71 | check_nt_LDADD = $(NETLIBS) | 71 | check_nt_LDADD = $(NETLIBS) |
72 | check_ntp_LDADD = $(NETLIBS) $(MATHLIBS) | 72 | check_ntp_LDADD = $(NETLIBS) $(MATHLIBS) |
73 | check_ntpd_LDADD = $(NETLIBS) $(MATHLIBS) | ||
73 | check_nwstat_LDADD = $(NETLIBS) | 74 | check_nwstat_LDADD = $(NETLIBS) |
74 | check_overcr_LDADD = $(NETLIBS) | 75 | check_overcr_LDADD = $(NETLIBS) |
75 | check_pgsql_LDADD = $(NETLIBS) $(PGLIBS) | 76 | check_pgsql_LDADD = $(NETLIBS) $(PGLIBS) |
@@ -83,6 +84,7 @@ check_ssh_LDADD = $(NETLIBS) | |||
83 | check_swap_LDADD = $(MATHLIBS) $(BASEOBJS) popen.o | 84 | check_swap_LDADD = $(MATHLIBS) $(BASEOBJS) popen.o |
84 | check_tcp_LDADD = $(SSLOBJS) $(NETLIBS) $(SSLLIBS) | 85 | check_tcp_LDADD = $(SSLOBJS) $(NETLIBS) $(SSLLIBS) |
85 | check_time_LDADD = $(NETLIBS) | 86 | check_time_LDADD = $(NETLIBS) |
87 | check_time_ntp_LDADD = $(NETLIBS) $(MATHLIBS) | ||
86 | check_ups_LDADD = $(NETLIBS) | 88 | check_ups_LDADD = $(NETLIBS) |
87 | check_users_LDADD = $(BASEOBJS) popen.o | 89 | check_users_LDADD = $(BASEOBJS) popen.o |
88 | check_by_ssh_LDADD = $(NETLIBS) runcmd.o | 90 | check_by_ssh_LDADD = $(NETLIBS) runcmd.o |
@@ -110,6 +112,7 @@ check_mysql_query_DEPENDENCIES = check_mysql_query.c $(NETOBJS) $(DEPLIBS) | |||
110 | check_nagios_DEPENDENCIES = check_nagios.c $(BASEOBJS) runcmd.o $(DEPLIBS) | 112 | check_nagios_DEPENDENCIES = check_nagios.c $(BASEOBJS) runcmd.o $(DEPLIBS) |
111 | check_nt_DEPENDENCIES = check_nt.c $(NETOBJS) $(DEPLIBS) | 113 | check_nt_DEPENDENCIES = check_nt.c $(NETOBJS) $(DEPLIBS) |
112 | check_ntp_DEPENDENCIES = check_ntp.c $(NETOBJS) $(DEPLIBS) | 114 | check_ntp_DEPENDENCIES = check_ntp.c $(NETOBJS) $(DEPLIBS) |
115 | check_ntpd_DEPENDENCIES = check_ntpd.c $(NETOBJS) $(DEPLIBS) | ||
113 | check_nwstat_DEPENDENCIES = check_nwstat.c $(NETOBJS) $(DEPLIBS) | 116 | check_nwstat_DEPENDENCIES = check_nwstat.c $(NETOBJS) $(DEPLIBS) |
114 | check_overcr_DEPENDENCIES = check_overcr.c $(NETOBJS) $(DEPLIBS) | 117 | check_overcr_DEPENDENCIES = check_overcr.c $(NETOBJS) $(DEPLIBS) |
115 | check_pgsql_DEPENDENCIES = check_pgsql.c $(NETOBJS) $(DEPLIBS) | 118 | check_pgsql_DEPENDENCIES = check_pgsql.c $(NETOBJS) $(DEPLIBS) |
@@ -123,6 +126,7 @@ check_ssh_DEPENDENCIES = check_ssh.c $(NETOBJS) $(DEPLIBS) | |||
123 | check_swap_DEPENDENCIES = check_swap.c $(BASEOBJS) popen.o $(DEPLIBS) | 126 | check_swap_DEPENDENCIES = check_swap.c $(BASEOBJS) popen.o $(DEPLIBS) |
124 | check_tcp_DEPENDENCIES = check_tcp.c $(SSLOBJS) $(NETOBJS) $(DEPLIBS) | 127 | check_tcp_DEPENDENCIES = check_tcp.c $(SSLOBJS) $(NETOBJS) $(DEPLIBS) |
125 | check_time_DEPENDENCIES = check_time.c $(NETOBJS) $(DEPLIBS) | 128 | check_time_DEPENDENCIES = check_time.c $(NETOBJS) $(DEPLIBS) |
129 | check_time_ntp_DEPENDENCIES = check_time_ntp.c $(NETOBJS) $(DEPLIBS) | ||
126 | check_ups_DEPENDENCIES = check_ups.c $(NETOBJS) $(DEPLIBS) | 130 | check_ups_DEPENDENCIES = check_ups.c $(NETOBJS) $(DEPLIBS) |
127 | check_users_DEPENDENCIES = check_users.c $(BASEOBJS) popen.o $(DEPLIBS) | 131 | check_users_DEPENDENCIES = check_users.c $(BASEOBJS) popen.o $(DEPLIBS) |
128 | check_by_ssh_DEPENDENCIES = check_by_ssh.c $(NETOBJS) runcmd.o $(DEPLIBS) | 132 | check_by_ssh_DEPENDENCIES = check_by_ssh.c $(NETOBJS) runcmd.o $(DEPLIBS) |
diff --git a/plugins/check_ntpd.c b/plugins/check_ntpd.c index df6e02f8..61c678c0 100644 --- a/plugins/check_ntpd.c +++ b/plugins/check_ntpd.c | |||
@@ -10,7 +10,7 @@ | |||
10 | * | 10 | * |
11 | * Description: | 11 | * Description: |
12 | * | 12 | * |
13 | * This file contains the check_ntp plugin | 13 | * This file contains the check_ntpd plugin |
14 | * | 14 | * |
15 | * This plugin to check ntp servers independant of any commandline | 15 | * This plugin to check ntp servers independant of any commandline |
16 | * programs or external libraries. | 16 | * programs or external libraries. |
@@ -36,7 +36,7 @@ | |||
36 | 36 | ||
37 | *****************************************************************************/ | 37 | *****************************************************************************/ |
38 | 38 | ||
39 | const char *progname = "check_ntp"; | 39 | const char *progname = "check_ntpd"; |
40 | const char *revision = "$Revision$"; | 40 | const char *revision = "$Revision$"; |
41 | const char *copyright = "2007"; | 41 | const char *copyright = "2007"; |
42 | const char *email = "nagiosplug-devel@lists.sourceforge.net"; | 42 | const char *email = "nagiosplug-devel@lists.sourceforge.net"; |
@@ -56,6 +56,7 @@ static char *scrit="16"; | |||
56 | static short do_jitter=0; | 56 | static short do_jitter=0; |
57 | static char *jwarn="5000"; | 57 | static char *jwarn="5000"; |
58 | static char *jcrit="10000"; | 58 | static char *jcrit="10000"; |
59 | static int syncsource_found=0; | ||
59 | 60 | ||
60 | int process_arguments (int, char **); | 61 | int process_arguments (int, char **); |
61 | thresholds *offset_thresholds = NULL; | 62 | thresholds *offset_thresholds = NULL; |
@@ -64,9 +65,6 @@ thresholds *stratum_thresholds = NULL; | |||
64 | void print_help (void); | 65 | void print_help (void); |
65 | void print_usage (void); | 66 | void print_usage (void); |
66 | 67 | ||
67 | /* number of times to perform each request to get a good average. */ | ||
68 | #define AVG_NUM 4 | ||
69 | |||
70 | /* max size of control message data */ | 68 | /* max size of control message data */ |
71 | #define MAX_CM_SIZE 468 | 69 | #define MAX_CM_SIZE 468 |
72 | 70 | ||
@@ -85,17 +83,6 @@ typedef struct { | |||
85 | uint64_t txts; /* time at which request departed server */ | 83 | uint64_t txts; /* time at which request departed server */ |
86 | } ntp_message; | 84 | } ntp_message; |
87 | 85 | ||
88 | /* this structure holds data about results from querying offset from a peer */ | ||
89 | typedef struct { | ||
90 | time_t waiting; /* ts set when we started waiting for a response */ | ||
91 | int num_responses; /* number of successfully recieved responses */ | ||
92 | uint8_t stratum; /* copied verbatim from the ntp_message */ | ||
93 | double rtdelay; /* converted from the ntp_message */ | ||
94 | double rtdisp; /* converted from the ntp_message */ | ||
95 | double offset[AVG_NUM]; /* offsets from each response */ | ||
96 | uint8_t flags; /* byte with leapindicator,vers,mode. see macros */ | ||
97 | } ntp_server_results; | ||
98 | |||
99 | /* this structure holds everything in an ntp control message as per rfc1305 */ | 86 | /* this structure holds everything in an ntp control message as per rfc1305 */ |
100 | typedef struct { | 87 | typedef struct { |
101 | uint8_t flags; /* byte with leapindicator,vers,mode. see macros */ | 88 | uint8_t flags; /* byte with leapindicator,vers,mode. see macros */ |
@@ -224,32 +211,6 @@ static inline double calc_offset(const ntp_message *m, const struct timeval *t){ | |||
224 | return (.5*((peer_tx-client_rx)+(peer_rx-client_tx))); | 211 | return (.5*((peer_tx-client_rx)+(peer_rx-client_tx))); |
225 | } | 212 | } |
226 | 213 | ||
227 | /* print out a ntp packet in human readable/debuggable format */ | ||
228 | void print_ntp_message(const ntp_message *p){ | ||
229 | struct timeval ref, orig, rx, tx; | ||
230 | |||
231 | NTP64toTV(p->refts,ref); | ||
232 | NTP64toTV(p->origts,orig); | ||
233 | NTP64toTV(p->rxts,rx); | ||
234 | NTP64toTV(p->txts,tx); | ||
235 | |||
236 | printf("packet contents:\n"); | ||
237 | printf("\tflags: 0x%.2x\n", p->flags); | ||
238 | printf("\t li=%d (0x%.2x)\n", LI(p->flags), p->flags&LI_MASK); | ||
239 | printf("\t vn=%d (0x%.2x)\n", VN(p->flags), p->flags&VN_MASK); | ||
240 | printf("\t mode=%d (0x%.2x)\n", MODE(p->flags), p->flags&MODE_MASK); | ||
241 | printf("\tstratum = %d\n", p->stratum); | ||
242 | printf("\tpoll = %g\n", pow(2, p->poll)); | ||
243 | printf("\tprecision = %g\n", pow(2, p->precision)); | ||
244 | printf("\trtdelay = %-.16g\n", NTP32asDOUBLE(p->rtdelay)); | ||
245 | printf("\trtdisp = %-.16g\n", NTP32asDOUBLE(p->rtdisp)); | ||
246 | printf("\trefid = %x\n", p->refid); | ||
247 | printf("\trefts = %-.16g\n", NTP64asDOUBLE(p->refts)); | ||
248 | printf("\torigts = %-.16g\n", NTP64asDOUBLE(p->origts)); | ||
249 | printf("\trxts = %-.16g\n", NTP64asDOUBLE(p->rxts)); | ||
250 | printf("\ttxts = %-.16g\n", NTP64asDOUBLE(p->txts)); | ||
251 | } | ||
252 | |||
253 | void print_ntp_control_message(const ntp_control_message *p){ | 214 | void print_ntp_control_message(const ntp_control_message *p){ |
254 | int i=0, numpeers=0; | 215 | int i=0, numpeers=0; |
255 | const ntp_assoc_status_pair *peer=NULL; | 216 | const ntp_assoc_status_pair *peer=NULL; |
@@ -285,224 +246,22 @@ void print_ntp_control_message(const ntp_control_message *p){ | |||
285 | } | 246 | } |
286 | } | 247 | } |
287 | } | 248 | } |
288 | 249 | char * | |
289 | void setup_request(ntp_message *p){ | 250 | extract_value(const char *varlist, const char *name){ |
290 | struct timeval t; | 251 | char *tmpvarlist=NULL, *tmpkey=NULL, *value=NULL; |
291 | 252 | int last=0; | |
292 | memset(p, 0, sizeof(ntp_message)); | 253 | |
293 | LI_SET(p->flags, LI_ALARM); | 254 | tmpvarlist = strdup(varlist); |
294 | VN_SET(p->flags, 4); | 255 | tmpkey = strtok(tmpvarlist, "="); |
295 | MODE_SET(p->flags, MODE_CLIENT); | 256 | |
296 | p->poll=4; | 257 | do { |
297 | p->precision=(int8_t)0xfa; | 258 | if(strstr(tmpkey, name) != NULL) { |
298 | L16(p->rtdelay)=htons(1); | 259 | value = strtok(NULL, ","); |
299 | L16(p->rtdisp)=htons(1); | 260 | last = 1; |
300 | |||
301 | gettimeofday(&t, NULL); | ||
302 | TVtoNTP64(t,p->txts); | ||
303 | } | ||
304 | |||
305 | /* select the "best" server from a list of servers, and return its index. | ||
306 | * this is done by filtering servers based on stratum, dispersion, and | ||
307 | * finally round-trip delay. */ | ||
308 | int best_offset_server(const ntp_server_results *slist, int nservers){ | ||
309 | int i=0, j=0, cserver=0, candidates[5], csize=0; | ||
310 | |||
311 | /* for each server */ | ||
312 | for(cserver=0; cserver<nservers; cserver++){ | ||
313 | /* sort out servers with error flags */ | ||
314 | if ( LI(slist[cserver].flags) != LI_NOWARNING ){ | ||
315 | if (verbose) printf("discarding peer id %d: flags=%d\n", cserver, LI(slist[cserver].flags)); | ||
316 | break; | ||
317 | } | ||
318 | |||
319 | /* compare it to each of the servers already in the candidate list */ | ||
320 | for(i=0; i<csize; i++){ | ||
321 | /* does it have an equal or better stratum? */ | ||
322 | if(slist[cserver].stratum <= slist[i].stratum){ | ||
323 | /* does it have an equal or better dispersion? */ | ||
324 | if(slist[cserver].rtdisp <= slist[i].rtdisp){ | ||
325 | /* does it have a better rtdelay? */ | ||
326 | if(slist[cserver].rtdelay < slist[i].rtdelay){ | ||
327 | break; | ||
328 | } | ||
329 | } | ||
330 | } | ||
331 | } | ||
332 | |||
333 | /* if we haven't reached the current list's end, move everyone | ||
334 | * over one to the right, and insert the new candidate */ | ||
335 | if(i<csize){ | ||
336 | for(j=5; j>i; j--){ | ||
337 | candidates[j]=candidates[j-1]; | ||
338 | } | ||
339 | } | ||
340 | /* regardless, if they should be on the list... */ | ||
341 | if(i<5) { | ||
342 | candidates[i]=cserver; | ||
343 | if(csize<5) csize++; | ||
344 | /* otherwise discard the server */ | ||
345 | } else { | ||
346 | DBG(printf("discarding peer id %d\n", cserver)); | ||
347 | } | ||
348 | } | ||
349 | |||
350 | if(csize>0) { | ||
351 | DBG(printf("best server selected: peer %d\n", candidates[0])); | ||
352 | return candidates[0]; | ||
353 | } else { | ||
354 | DBG(printf("no peers meeting synchronization criteria :(\n")); | ||
355 | return -1; | ||
356 | } | ||
357 | } | ||
358 | |||
359 | /* do everything we need to get the total average offset | ||
360 | * - we use a certain amount of parallelization with poll() to ensure | ||
361 | * we don't waste time sitting around waiting for single packets. | ||
362 | * - we also "manually" handle resolving host names and connecting, because | ||
363 | * we have to do it in a way that our lazy macros don't handle currently :( */ | ||
364 | double offset_request(const char *host, int *stratum, int *status){ | ||
365 | int i=0, j=0, ga_result=0, num_hosts=0, *socklist=NULL, respnum=0; | ||
366 | int servers_completed=0, one_written=0, one_read=0, servers_readable=0, best_index=-1; | ||
367 | time_t now_time=0, start_ts=0; | ||
368 | ntp_message *req=NULL; | ||
369 | double avg_offset=0.; | ||
370 | struct timeval recv_time; | ||
371 | struct addrinfo *ai=NULL, *ai_tmp=NULL, hints; | ||
372 | struct pollfd *ufds=NULL; | ||
373 | ntp_server_results *servers=NULL; | ||
374 | |||
375 | /* setup hints to only return results from getaddrinfo that we'd like */ | ||
376 | memset(&hints, 0, sizeof(struct addrinfo)); | ||
377 | hints.ai_family = address_family; | ||
378 | hints.ai_protocol = IPPROTO_UDP; | ||
379 | hints.ai_socktype = SOCK_DGRAM; | ||
380 | |||
381 | /* fill in ai with the list of hosts resolved by the host name */ | ||
382 | ga_result = getaddrinfo(host, "123", &hints, &ai); | ||
383 | if(ga_result!=0){ | ||
384 | die(STATE_UNKNOWN, "error getting address for %s: %s\n", | ||
385 | host, gai_strerror(ga_result)); | ||
386 | } | ||
387 | |||
388 | /* count the number of returned hosts, and allocate stuff accordingly */ | ||
389 | for(ai_tmp=ai; ai_tmp!=NULL; ai_tmp=ai_tmp->ai_next){ num_hosts++; } | ||
390 | req=(ntp_message*)malloc(sizeof(ntp_message)*num_hosts); | ||
391 | if(req==NULL) die(STATE_UNKNOWN, "can not allocate ntp message array"); | ||
392 | socklist=(int*)malloc(sizeof(int)*num_hosts); | ||
393 | if(socklist==NULL) die(STATE_UNKNOWN, "can not allocate socket array"); | ||
394 | ufds=(struct pollfd*)malloc(sizeof(struct pollfd)*num_hosts); | ||
395 | if(ufds==NULL) die(STATE_UNKNOWN, "can not allocate socket array"); | ||
396 | servers=(ntp_server_results*)malloc(sizeof(ntp_server_results)*num_hosts); | ||
397 | if(servers==NULL) die(STATE_UNKNOWN, "can not allocate server array"); | ||
398 | memset(servers, 0, sizeof(ntp_server_results)*num_hosts); | ||
399 | |||
400 | /* setup each socket for writing, and the corresponding struct pollfd */ | ||
401 | ai_tmp=ai; | ||
402 | for(i=0;ai_tmp;i++){ | ||
403 | socklist[i]=socket(ai_tmp->ai_family, SOCK_DGRAM, IPPROTO_UDP); | ||
404 | if(socklist[i] == -1) { | ||
405 | perror(NULL); | ||
406 | die(STATE_UNKNOWN, "can not create new socket"); | ||
407 | } | ||
408 | if(connect(socklist[i], ai_tmp->ai_addr, ai_tmp->ai_addrlen)){ | ||
409 | die(STATE_UNKNOWN, "can't create socket connection"); | ||
410 | } else { | ||
411 | ufds[i].fd=socklist[i]; | ||
412 | ufds[i].events=POLLIN; | ||
413 | ufds[i].revents=0; | ||
414 | } | ||
415 | ai_tmp = ai_tmp->ai_next; | ||
416 | } | ||
417 | |||
418 | /* now do AVG_NUM checks to each host. we stop before timeout/2 seconds | ||
419 | * have passed in order to ensure post-processing and jitter time. */ | ||
420 | now_time=start_ts=time(NULL); | ||
421 | while(servers_completed<num_hosts && now_time-start_ts <= socket_timeout/2){ | ||
422 | /* loop through each server and find each one which hasn't | ||
423 | * been touched in the past second or so and is still lacking | ||
424 | * some responses. for each of these servers, send a new request, | ||
425 | * and update the "waiting" timestamp with the current time. */ | ||
426 | one_written=0; | ||
427 | now_time=time(NULL); | ||
428 | |||
429 | for(i=0; i<num_hosts; i++){ | ||
430 | if(servers[i].waiting<now_time && servers[i].num_responses<AVG_NUM){ | ||
431 | if(verbose && servers[i].waiting != 0) printf("re-"); | ||
432 | if(verbose) printf("sending request to peer %d\n", i); | ||
433 | setup_request(&req[i]); | ||
434 | write(socklist[i], &req[i], sizeof(ntp_message)); | ||
435 | servers[i].waiting=now_time; | ||
436 | one_written=1; | ||
437 | break; | ||
438 | } | ||
439 | } | ||
440 | |||
441 | /* quickly poll for any sockets with pending data */ | ||
442 | servers_readable=poll(ufds, num_hosts, 100); | ||
443 | if(servers_readable==-1){ | ||
444 | perror("polling ntp sockets"); | ||
445 | die(STATE_UNKNOWN, "communication errors"); | ||
446 | } | ||
447 | |||
448 | /* read from any sockets with pending data */ | ||
449 | for(i=0; servers_readable && i<num_hosts; i++){ | ||
450 | if(ufds[i].revents&POLLIN && servers[i].num_responses < AVG_NUM){ | ||
451 | if(verbose) { | ||
452 | printf("response from peer %d: ", i); | ||
453 | } | ||
454 | |||
455 | read(ufds[i].fd, &req[i], sizeof(ntp_message)); | ||
456 | gettimeofday(&recv_time, NULL); | ||
457 | DBG(print_ntp_message(&req[i])); | ||
458 | respnum=servers[i].num_responses++; | ||
459 | servers[i].offset[respnum]=calc_offset(&req[i], &recv_time); | ||
460 | if(verbose) { | ||
461 | printf("offset %.10g, stratum %i\n", servers[i].offset[respnum], req[i].stratum); | ||
462 | } | ||
463 | servers[i].stratum=req[i].stratum; | ||
464 | servers[i].rtdisp=NTP32asDOUBLE(req[i].rtdisp); | ||
465 | servers[i].rtdelay=NTP32asDOUBLE(req[i].rtdelay); | ||
466 | servers[i].waiting=0; | ||
467 | servers[i].flags=req[i].flags; | ||
468 | servers_readable--; | ||
469 | one_read = 1; | ||
470 | if(servers[i].num_responses==AVG_NUM) servers_completed++; | ||
471 | } | ||
472 | } | ||
473 | /* lather, rinse, repeat. */ | ||
474 | } | ||
475 | |||
476 | if (one_read == 0) { | ||
477 | die(STATE_CRITICAL, "NTP CRITICAL: No response from NTP server\n"); | ||
478 | } | ||
479 | |||
480 | /* now, pick the best server from the list */ | ||
481 | best_index=best_offset_server(servers, num_hosts); | ||
482 | if(best_index < 0){ | ||
483 | *status=STATE_UNKNOWN; | ||
484 | } else { | ||
485 | /* finally, calculate the average offset */ | ||
486 | for(i=0; i<servers[best_index].num_responses;i++){ | ||
487 | avg_offset+=servers[best_index].offset[j]; | ||
488 | } | 261 | } |
489 | avg_offset/=servers[best_index].num_responses; | 262 | } while (last == 0 && (tmpkey = strtok(NULL, "="))); |
490 | *stratum = servers[best_index].stratum; | ||
491 | } | ||
492 | 263 | ||
493 | /* cleanup */ | 264 | return value; |
494 | /* FIXME: Not closing the socket to avoid re-use of the local port | ||
495 | * which can cause old NTP packets to be read instead of NTP control | ||
496 | * pactets in jitter_request(). THERE MUST BE ANOTHER WAY... | ||
497 | * for(j=0; j<num_hosts; j++){ close(socklist[j]); } */ | ||
498 | free(socklist); | ||
499 | free(ufds); | ||
500 | free(servers); | ||
501 | free(req); | ||
502 | freeaddrinfo(ai); | ||
503 | |||
504 | if(verbose) printf("overall average offset: %.10g\n", avg_offset); | ||
505 | return avg_offset; | ||
506 | } | 265 | } |
507 | 266 | ||
508 | void | 267 | void |
@@ -517,19 +276,22 @@ setup_control_request(ntp_control_message *p, uint8_t opcode, uint16_t seq){ | |||
517 | } | 276 | } |
518 | 277 | ||
519 | /* XXX handle responses with the error bit set */ | 278 | /* XXX handle responses with the error bit set */ |
520 | double jitter_request(const char *host, int *status){ | 279 | int ntp_request(const char *host, double *offset, int *offset_result, double *jitter, int *stratum){ |
521 | int conn=-1, i, npeers=0, num_candidates=0, syncsource_found=0; | 280 | int conn=-1, i, npeers=0, num_candidates=0; |
522 | int run=0, min_peer_sel=PEER_INCLUDED, num_selected=0, num_valid=0; | 281 | int min_peer_sel=PEER_INCLUDED; |
523 | int peers_size=0, peer_offset=0; | 282 | int peers_size=0, peer_offset=0; |
283 | int status; | ||
524 | ntp_assoc_status_pair *peers=NULL; | 284 | ntp_assoc_status_pair *peers=NULL; |
525 | ntp_control_message req; | 285 | ntp_control_message req; |
526 | const char *getvar = "jitter"; | 286 | const char *getvar = "stratum,offset,jitter"; |
527 | double rval = 0.0, jitter = -1.0; | 287 | char *value=NULL, *nptr=NULL; |
528 | char *startofvalue=NULL, *nptr=NULL; | ||
529 | void *tmp; | 288 | void *tmp; |
530 | 289 | ||
290 | *offset_result = STATE_UNKNOWN; | ||
291 | *jitter = *stratum = -1; | ||
292 | |||
531 | /* Long-winded explanation: | 293 | /* Long-winded explanation: |
532 | * Getting the jitter requires a number of steps: | 294 | * Getting the offset, jitter and stratum requires a number of steps: |
533 | * 1) Send a READSTAT request. | 295 | * 1) Send a READSTAT request. |
534 | * 2) Interpret the READSTAT reply | 296 | * 2) Interpret the READSTAT reply |
535 | * a) The data section contains a list of peer identifiers (16 bits) | 297 | * a) The data section contains a list of peer identifiers (16 bits) |
@@ -540,7 +302,8 @@ double jitter_request(const char *host, int *status){ | |||
540 | * set a minimum of warning. | 302 | * set a minimum of warning. |
541 | * 3) Send a READVAR request for information on each peer identified | 303 | * 3) Send a READVAR request for information on each peer identified |
542 | * in 2b greater than the minimum selection value. | 304 | * in 2b greater than the minimum selection value. |
543 | * 4) Extract the jitter value from the data[] (it's ASCII) | 305 | * 4) Extract the offset, jitter and stratum value from the data[] |
306 | * (it's ASCII) | ||
544 | */ | 307 | */ |
545 | my_udp_connect(server_address, 123, &conn); | 308 | my_udp_connect(server_address, 123, &conn); |
546 | 309 | ||
@@ -583,75 +346,100 @@ double jitter_request(const char *host, int *status){ | |||
583 | if(verbose) printf("%d candiate peers available\n", num_candidates); | 346 | if(verbose) printf("%d candiate peers available\n", num_candidates); |
584 | if(verbose && syncsource_found) printf("synchronization source found\n"); | 347 | if(verbose && syncsource_found) printf("synchronization source found\n"); |
585 | if(! syncsource_found){ | 348 | if(! syncsource_found){ |
586 | *status = STATE_UNKNOWN; | 349 | status = STATE_WARNING; |
587 | if(verbose) printf("warning: no synchronization source found\n"); | 350 | if(verbose) printf("warning: no synchronization source found\n"); |
588 | } | 351 | } |
589 | 352 | ||
590 | 353 | ||
591 | for (run=0; run<AVG_NUM; run++){ | 354 | for (i = 0; i < npeers; i++){ |
592 | if(verbose) printf("jitter run %d of %d\n", run+1, AVG_NUM); | 355 | /* Only query this server if it is the current sync source */ |
593 | for (i = 0; i < npeers; i++){ | 356 | if (PEER_SEL(peers[i].status) >= min_peer_sel){ |
594 | /* Only query this server if it is the current sync source */ | 357 | if(verbose) printf("Getting offset, jitter and stratum for peer %.2x\n", ntohs(peers[i].assoc)); |
595 | if (PEER_SEL(peers[i].status) >= min_peer_sel){ | 358 | setup_control_request(&req, OP_READVAR, 2); |
596 | num_selected++; | 359 | req.assoc = peers[i].assoc; |
597 | setup_control_request(&req, OP_READVAR, 2); | 360 | /* Putting the wanted variable names in the request |
598 | req.assoc = peers[i].assoc; | 361 | * cause the server to provide _only_ the requested values. |
599 | /* By spec, putting the variable name "jitter" in the request | 362 | * thus reducing net traffic, guaranteeing us only a single |
600 | * should cause the server to provide _only_ the jitter value. | 363 | * datagram in reply, and making intepretation much simpler |
601 | * thus reducing net traffic, guaranteeing us only a single | 364 | */ |
602 | * datagram in reply, and making intepretation much simpler | 365 | /* Older servers doesn't know what jitter is, so if we get an |
603 | */ | 366 | * error on the first pass we redo it with "dispersion" */ |
604 | /* Older servers doesn't know what jitter is, so if we get an | 367 | strncpy(req.data, getvar, MAX_CM_SIZE-1); |
605 | * error on the first pass we redo it with "dispersion" */ | 368 | req.count = htons(strlen(getvar)); |
606 | strncpy(req.data, getvar, MAX_CM_SIZE-1); | 369 | DBG(printf("sending READVAR request...\n")); |
607 | req.count = htons(strlen(getvar)); | 370 | write(conn, &req, SIZEOF_NTPCM(req)); |
608 | DBG(printf("sending READVAR request...\n")); | 371 | DBG(print_ntp_control_message(&req)); |
609 | write(conn, &req, SIZEOF_NTPCM(req)); | 372 | |
610 | DBG(print_ntp_control_message(&req)); | 373 | req.count = htons(MAX_CM_SIZE); |
611 | 374 | DBG(printf("recieving READVAR response...\n")); | |
612 | req.count = htons(MAX_CM_SIZE); | 375 | read(conn, &req, SIZEOF_NTPCM(req)); |
613 | DBG(printf("recieving READVAR response...\n")); | 376 | DBG(print_ntp_control_message(&req)); |
614 | read(conn, &req, SIZEOF_NTPCM(req)); | 377 | |
615 | DBG(print_ntp_control_message(&req)); | 378 | if(req.op&REM_ERROR && strstr(getvar, "jitter")) { |
616 | 379 | if(verbose) printf("The 'jitter' command failed (old ntp server?)\nRestarting with 'dispersion'...\n"); | |
617 | if(req.op&REM_ERROR && strstr(getvar, "jitter")) { | 380 | getvar = "stratum,offset,dispersion"; |
618 | if(verbose) printf("The 'jitter' command failed (old ntp server?)\nRestarting with 'dispersion'...\n"); | 381 | continue; |
619 | getvar = "dispersion"; | 382 | } |
620 | num_selected--; | ||
621 | i--; | ||
622 | continue; | ||
623 | } | ||
624 | 383 | ||
625 | /* get to the float value */ | 384 | if(verbose) |
385 | printf("Server responded: >>>%s<<<\n", req.data); | ||
386 | |||
387 | /* get the offset */ | ||
388 | if(verbose) | ||
389 | printf("parsing offset from peer %.2x: ", ntohs(peers[i].assoc)); | ||
390 | |||
391 | value = extract_value(req.data, "offset"); | ||
392 | //value = extract_value(req.data, "jitter="); | ||
393 | if(value != NULL) | ||
394 | *offset = strtod(value, &nptr) / 1000; | ||
395 | if(value == NULL || value==nptr){ | ||
396 | printf("warning: unable to read server offset response.\n"); | ||
397 | status = max_state(status, STATE_CRITICAL); | ||
398 | } else { | ||
399 | *offset_result = STATE_OK; | ||
400 | if(verbose) printf("%g\n", *offset); | ||
401 | } | ||
402 | |||
403 | if(do_jitter) { | ||
404 | value = NULL, nptr=NULL; | ||
405 | /* get the jitter */ | ||
626 | if(verbose) { | 406 | if(verbose) { |
627 | printf("parsing jitter from peer %.2x: ", ntohs(peers[i].assoc)); | 407 | printf("parsing jitter from peer %.2x: ", ntohs(peers[i].assoc)); |
628 | } | 408 | } |
629 | startofvalue = strchr(req.data, '='); | 409 | //*value = extract_value(req.data, strstr(getvar, "dispersion") ? "dispersion=" : "jitter="); |
630 | if(startofvalue != NULL) { | 410 | value = extract_value(req.data, "jitter"); |
631 | startofvalue++; | 411 | if(value != NULL) |
632 | jitter = strtod(startofvalue, &nptr); | 412 | *jitter = strtod(value, &nptr); |
633 | } | 413 | if(value == NULL || value==nptr){ |
634 | if(startofvalue == NULL || startofvalue==nptr){ | ||
635 | printf("warning: unable to read server jitter response.\n"); | 414 | printf("warning: unable to read server jitter response.\n"); |
636 | *status = STATE_UNKNOWN; | 415 | status = max_state(status, STATE_UNKNOWN); |
637 | } else { | 416 | } else { |
638 | if(verbose) printf("%g\n", jitter); | 417 | if(verbose) printf("%g\n", *jitter); |
639 | num_valid++; | ||
640 | rval += jitter; | ||
641 | } | 418 | } |
642 | } | 419 | } |
643 | } | ||
644 | if(verbose){ | ||
645 | printf("jitter parsed from %d/%d peers\n", num_valid, num_selected); | ||
646 | } | ||
647 | } | ||
648 | 420 | ||
649 | rval = num_valid ? rval / num_valid : -1.0; | 421 | if(do_stratum) { |
422 | value = NULL; | ||
423 | /* get the stratum */ | ||
424 | if(verbose) { | ||
425 | printf("parsing stratum from peer %.2x: ", ntohs(peers[i].assoc)); | ||
426 | } | ||
427 | value = extract_value(req.data, "stratum"); | ||
428 | if(value == NULL){ | ||
429 | printf("warning: unable to read server stratum response.\n"); | ||
430 | status = max_state(status, STATE_UNKNOWN); | ||
431 | } else { | ||
432 | *stratum = atoi(value); | ||
433 | if(verbose) printf("%i\n", *stratum); | ||
434 | } | ||
435 | } | ||
436 | } /* if (PEER_SEL(peers[i].status) >= min_peer_sel) */ | ||
437 | } /* for (i = 0; i < npeers; i++) */ | ||
650 | 438 | ||
651 | close(conn); | 439 | close(conn); |
652 | if(peers!=NULL) free(peers); | 440 | if(peers!=NULL) free(peers); |
653 | /* If we return -1.0, it means no synchronization source was found */ | 441 | |
654 | return rval; | 442 | return status; |
655 | } | 443 | } |
656 | 444 | ||
657 | int process_arguments(int argc, char **argv){ | 445 | int process_arguments(int argc, char **argv){ |
@@ -776,11 +564,11 @@ char *perfd_stratum (int stratum) | |||
776 | } | 564 | } |
777 | 565 | ||
778 | int main(int argc, char *argv[]){ | 566 | int main(int argc, char *argv[]){ |
779 | int result, offset_result, jitter_result, stratum; | 567 | int result, offset_result, stratum; |
780 | double offset=0, jitter=0; | 568 | double offset=0, jitter=0; |
781 | char *result_line, *perfdata_line; | 569 | char *result_line, *perfdata_line; |
782 | 570 | ||
783 | result = offset_result = jitter_result = STATE_OK; | 571 | result = offset_result = STATE_OK; |
784 | 572 | ||
785 | if (process_arguments (argc, argv) == ERROR) | 573 | if (process_arguments (argc, argv) == ERROR) |
786 | usage4 (_("Could not parse arguments")); | 574 | usage4 (_("Could not parse arguments")); |
@@ -795,30 +583,14 @@ int main(int argc, char *argv[]){ | |||
795 | /* set socket timeout */ | 583 | /* set socket timeout */ |
796 | alarm (socket_timeout); | 584 | alarm (socket_timeout); |
797 | 585 | ||
798 | offset = offset_request(server_address, &stratum, &offset_result); | 586 | result = ntp_request(server_address, &offset, &offset_result, &jitter, &stratum); |
799 | if (do_offset && offset_result == STATE_UNKNOWN) { | 587 | result = get_status(fabs(offset), offset_thresholds); |
800 | result = STATE_CRITICAL; | 588 | |
801 | } else { | ||
802 | result = get_status(fabs(offset), offset_thresholds); | ||
803 | } | ||
804 | result = max_state(result, offset_result); | ||
805 | if(do_stratum) | 589 | if(do_stratum) |
806 | result = max_state(result, get_status(stratum, stratum_thresholds)); | 590 | result = max_state(result, get_status(stratum, stratum_thresholds)); |
807 | 591 | ||
808 | /* If not told to check the jitter, we don't even send packets. | 592 | if(do_jitter) |
809 | * jitter is checked using NTP control packets, which not all | ||
810 | * servers recognize. Trying to check the jitter on OpenNTPD | ||
811 | * (for example) will result in an error | ||
812 | */ | ||
813 | if(do_jitter){ | ||
814 | jitter=jitter_request(server_address, &jitter_result); | ||
815 | result = max_state(result, get_status(jitter, jitter_thresholds)); | 593 | result = max_state(result, get_status(jitter, jitter_thresholds)); |
816 | /* -1 indicates that we couldn't calculate the jitter | ||
817 | * Only overrides STATE_OK from the offset */ | ||
818 | if(jitter == -1.0 && result == STATE_OK) | ||
819 | result = STATE_UNKNOWN; | ||
820 | } | ||
821 | result = max_state(result, jitter_result); | ||
822 | 594 | ||
823 | switch (result) { | 595 | switch (result) { |
824 | case STATE_CRITICAL : | 596 | case STATE_CRITICAL : |
@@ -834,15 +606,13 @@ int main(int argc, char *argv[]){ | |||
834 | asprintf(&result_line, "NTP UNKNOWN:"); | 606 | asprintf(&result_line, "NTP UNKNOWN:"); |
835 | break; | 607 | break; |
836 | } | 608 | } |
609 | if(!syncsource_found) | ||
610 | asprintf(&result_line, "%s %s, ", result_line, _("Server not synchronized")); | ||
611 | |||
837 | if(offset_result == STATE_UNKNOWN){ | 612 | if(offset_result == STATE_UNKNOWN){ |
838 | asprintf(&result_line, "%s %s", result_line, _("Offset unknown")); | 613 | asprintf(&result_line, "%s %s", result_line, _("Offset unknown")); |
839 | asprintf(&perfdata_line, ""); | 614 | asprintf(&perfdata_line, ""); |
840 | } else { | 615 | } else { |
841 | #if 0 /* 2007-10-25 This can't happen. Leftovers or uninplemented? */ | ||
842 | if(offset_result==STATE_WARNING){ | ||
843 | asprintf(&result_line, "%s %s", result_line, _("Unable to fully sample sync server")); | ||
844 | } | ||
845 | #endif | ||
846 | asprintf(&result_line, "%s Offset %.10g secs", result_line, offset); | 616 | asprintf(&result_line, "%s Offset %.10g secs", result_line, offset); |
847 | asprintf(&perfdata_line, "%s", perfd_offset(offset)); | 617 | asprintf(&perfdata_line, "%s", perfd_offset(offset)); |
848 | } | 618 | } |
diff --git a/plugins/check_time_ntp.c b/plugins/check_time_ntp.c index df6e02f8..d23987ba 100644 --- a/plugins/check_time_ntp.c +++ b/plugins/check_time_ntp.c | |||
@@ -1,6 +1,6 @@ | |||
1 | /****************************************************************************** | 1 | /****************************************************************************** |
2 | * | 2 | * TODO: check offset only (remove all NTP control packet stuff) |
3 | * Nagios check_ntp plugin | 3 | * Nagios check_time_ntp plugin |
4 | * | 4 | * |
5 | * License: GPL | 5 | * License: GPL |
6 | * Copyright (c) 2006 sean finney <seanius@seanius.net> | 6 | * Copyright (c) 2006 sean finney <seanius@seanius.net> |
@@ -10,7 +10,7 @@ | |||
10 | * | 10 | * |
11 | * Description: | 11 | * Description: |
12 | * | 12 | * |
13 | * This file contains the check_ntp plugin | 13 | * This file contains the check_time_ntp plugin |
14 | * | 14 | * |
15 | * This plugin to check ntp servers independant of any commandline | 15 | * This plugin to check ntp servers independant of any commandline |
16 | * programs or external libraries. | 16 | * programs or external libraries. |
diff --git a/plugins/t/check_ntpd.t b/plugins/t/check_ntpd.t new file mode 100644 index 00000000..3c5fe2ed --- /dev/null +++ b/plugins/t/check_ntpd.t | |||
@@ -0,0 +1,57 @@ | |||
1 | #! /usr/bin/perl -w -I .. | ||
2 | # | ||
3 | # Testing NTP | ||
4 | # | ||
5 | # $Id: check_ntp.t 1468 2006-08-14 08:42:23Z tonvoon $ | ||
6 | # | ||
7 | |||
8 | use strict; | ||
9 | use Test::More; | ||
10 | use NPTest; | ||
11 | |||
12 | plan tests => 4; | ||
13 | |||
14 | my $res; | ||
15 | |||
16 | my $ntp_service = getTestParameter( "NP_GOOD_NTP_SERVICE", | ||
17 | "A host providing NTP service", | ||
18 | "pool.ntp.org"); | ||
19 | |||
20 | my $no_ntp_service = getTestParameter( "NP_NO_NTP_SERVICE", | ||
21 | "A host NOT providing the NTP service", | ||
22 | "localhost" ); | ||
23 | |||
24 | my $host_nonresponsive = getTestParameter( "NP_HOST_NONRESPONSIVE", | ||
25 | "The hostname of system not responsive to network requests", | ||
26 | "10.0.0.1" ); | ||
27 | |||
28 | my $hostname_invalid = getTestParameter( "NP_HOSTNAME_INVALID", | ||
29 | "An invalid (not known to DNS) hostname", | ||
30 | "nosuchhost"); | ||
31 | |||
32 | SKIP: { | ||
33 | skip "No NTP server defined", 1 unless $ntp_service; | ||
34 | $res = NPTest->testCmd( | ||
35 | "./check_ntp -H $ntp_service" | ||
36 | ); | ||
37 | cmp_ok( $res->return_code, '==', 0, "Got good NTP result"); | ||
38 | } | ||
39 | |||
40 | SKIP: { | ||
41 | skip "No bad NTP server defined", 1 unless $no_ntp_service; | ||
42 | $res = NPTest->testCmd( | ||
43 | "./check_ntp -H $no_ntp_service" | ||
44 | ); | ||
45 | cmp_ok( $res->return_code, '==', 2, "Got bad NTP result"); | ||
46 | } | ||
47 | |||
48 | $res = NPTest->testCmd( | ||
49 | "./check_ntp -H $host_nonresponsive" | ||
50 | ); | ||
51 | cmp_ok( $res->return_code, '==', 2, "Got critical if server not responding"); | ||
52 | |||
53 | $res = NPTest->testCmd( | ||
54 | "./check_ntp -H $hostname_invalid" | ||
55 | ); | ||
56 | cmp_ok( $res->return_code, '==', 3, "Got critical if server hostname invalid"); | ||
57 | |||
diff --git a/plugins/t/check_time_ntp.t b/plugins/t/check_time_ntp.t new file mode 100644 index 00000000..3c5fe2ed --- /dev/null +++ b/plugins/t/check_time_ntp.t | |||
@@ -0,0 +1,57 @@ | |||
1 | #! /usr/bin/perl -w -I .. | ||
2 | # | ||
3 | # Testing NTP | ||
4 | # | ||
5 | # $Id: check_ntp.t 1468 2006-08-14 08:42:23Z tonvoon $ | ||
6 | # | ||
7 | |||
8 | use strict; | ||
9 | use Test::More; | ||
10 | use NPTest; | ||
11 | |||
12 | plan tests => 4; | ||
13 | |||
14 | my $res; | ||
15 | |||
16 | my $ntp_service = getTestParameter( "NP_GOOD_NTP_SERVICE", | ||
17 | "A host providing NTP service", | ||
18 | "pool.ntp.org"); | ||
19 | |||
20 | my $no_ntp_service = getTestParameter( "NP_NO_NTP_SERVICE", | ||
21 | "A host NOT providing the NTP service", | ||
22 | "localhost" ); | ||
23 | |||
24 | my $host_nonresponsive = getTestParameter( "NP_HOST_NONRESPONSIVE", | ||
25 | "The hostname of system not responsive to network requests", | ||
26 | "10.0.0.1" ); | ||
27 | |||
28 | my $hostname_invalid = getTestParameter( "NP_HOSTNAME_INVALID", | ||
29 | "An invalid (not known to DNS) hostname", | ||
30 | "nosuchhost"); | ||
31 | |||
32 | SKIP: { | ||
33 | skip "No NTP server defined", 1 unless $ntp_service; | ||
34 | $res = NPTest->testCmd( | ||
35 | "./check_ntp -H $ntp_service" | ||
36 | ); | ||
37 | cmp_ok( $res->return_code, '==', 0, "Got good NTP result"); | ||
38 | } | ||
39 | |||
40 | SKIP: { | ||
41 | skip "No bad NTP server defined", 1 unless $no_ntp_service; | ||
42 | $res = NPTest->testCmd( | ||
43 | "./check_ntp -H $no_ntp_service" | ||
44 | ); | ||
45 | cmp_ok( $res->return_code, '==', 2, "Got bad NTP result"); | ||
46 | } | ||
47 | |||
48 | $res = NPTest->testCmd( | ||
49 | "./check_ntp -H $host_nonresponsive" | ||
50 | ); | ||
51 | cmp_ok( $res->return_code, '==', 2, "Got critical if server not responding"); | ||
52 | |||
53 | $res = NPTest->testCmd( | ||
54 | "./check_ntp -H $hostname_invalid" | ||
55 | ); | ||
56 | cmp_ok( $res->return_code, '==', 3, "Got critical if server hostname invalid"); | ||
57 | |||