diff options
-rw-r--r-- | plugins/check_ntp.c | 167 |
1 files changed, 105 insertions, 62 deletions
diff --git a/plugins/check_ntp.c b/plugins/check_ntp.c index 86709a1f..149ca98f 100644 --- a/plugins/check_ntp.c +++ b/plugins/check_ntp.c | |||
@@ -29,6 +29,7 @@ const char *email = "nagiosplug-devel@lists.sourceforge.net"; | |||
29 | #include "common.h" | 29 | #include "common.h" |
30 | #include "netutils.h" | 30 | #include "netutils.h" |
31 | #include "utils.h" | 31 | #include "utils.h" |
32 | #include <sys/poll.h> | ||
32 | 33 | ||
33 | static char *server_address=NULL; | 34 | static char *server_address=NULL; |
34 | static int verbose=0; | 35 | static int verbose=0; |
@@ -64,6 +65,13 @@ typedef struct { | |||
64 | uint64_t txts; /* time at which request departed server */ | 65 | uint64_t txts; /* time at which request departed server */ |
65 | } ntp_message; | 66 | } ntp_message; |
66 | 67 | ||
68 | /* this structure holds data about results from querying offset from a peer */ | ||
69 | typedef struct { | ||
70 | int waiting; /* we set to 1 to signal waiting for a response */ | ||
71 | int num_responses; /* number of successfully recieved responses */ | ||
72 | double offset[AVG_NUM]; /* offsets from each response */ | ||
73 | } ntp_server_results; | ||
74 | |||
67 | /* this structure holds everything in an ntp control message as per rfc1305 */ | 75 | /* this structure holds everything in an ntp control message as per rfc1305 */ |
68 | typedef struct { | 76 | typedef struct { |
69 | uint8_t flags; /* byte with leapindicator,vers,mode. see macros */ | 77 | uint8_t flags; /* byte with leapindicator,vers,mode. see macros */ |
@@ -271,38 +279,20 @@ void setup_request(ntp_message *p){ | |||
271 | TVtoNTP64(t,p->txts); | 279 | TVtoNTP64(t,p->txts); |
272 | } | 280 | } |
273 | 281 | ||
282 | /* do everything we need to get the total average offset | ||
283 | * - we use a certain amount of parallelization with poll() to ensure | ||
284 | * we don't waste time sitting around waiting for single packets. | ||
285 | * - we also "manually" handle resolving host names and connecting, because | ||
286 | * we have to do it in a way that our lazy macros don't handle currently :( */ | ||
274 | double offset_request(const char *host){ | 287 | double offset_request(const char *host){ |
275 | int i=0, conn=-1; | 288 | int i=0, j=0, ga_result=0, num_hosts=0, *socklist=NULL, respnum=0; |
276 | ntp_message req; | 289 | int servers_completed=0, one_written=0, servers_readable=0, offsets_recvd=0; |
277 | double next_offset=0., avg_offset=0.; | 290 | ntp_message *req=NULL; |
278 | struct timeval recv_time; | 291 | double avg_offset=0.; |
279 | |||
280 | for(i=0; i<AVG_NUM; i++){ | ||
281 | if(verbose) printf("offset run: %d/%d\n", i+1, AVG_NUM); | ||
282 | setup_request(&req); | ||
283 | my_udp_connect(server_address, 123, &conn); | ||
284 | write(conn, &req, sizeof(ntp_message)); | ||
285 | read(conn, &req, sizeof(ntp_message)); | ||
286 | gettimeofday(&recv_time, NULL); | ||
287 | /* if(verbose) print_packet(&req); */ | ||
288 | close(conn); | ||
289 | next_offset=calc_offset(&req, &recv_time); | ||
290 | if(verbose) printf("offset: %g\n", next_offset); | ||
291 | avg_offset+=next_offset; | ||
292 | } | ||
293 | avg_offset/=AVG_NUM; | ||
294 | if(verbose) printf("average offset: %g\n", avg_offset); | ||
295 | return avg_offset; | ||
296 | } | ||
297 | |||
298 | |||
299 | /* this should behave more like ntpdate, but needs optomisations... */ | ||
300 | double offset_request_ntpdate(const char *host){ | ||
301 | int i=0, j=0, ga_result=0, num_hosts=0, *socklist=NULL; | ||
302 | ntp_message req; | ||
303 | double offset=0., avg_offset=0.; | ||
304 | struct timeval recv_time; | 292 | struct timeval recv_time; |
305 | struct addrinfo *ai=NULL, *ai_tmp=NULL, hints; | 293 | struct addrinfo *ai=NULL, *ai_tmp=NULL, hints; |
294 | struct pollfd *ufds=NULL; | ||
295 | ntp_server_results *servers=NULL; | ||
306 | 296 | ||
307 | /* setup hints to only return results from getaddrinfo that we'd like */ | 297 | /* setup hints to only return results from getaddrinfo that we'd like */ |
308 | memset(&hints, 0, sizeof(struct addrinfo)); | 298 | memset(&hints, 0, sizeof(struct addrinfo)); |
@@ -310,24 +300,26 @@ double offset_request_ntpdate(const char *host){ | |||
310 | hints.ai_protocol = IPPROTO_UDP; | 300 | hints.ai_protocol = IPPROTO_UDP; |
311 | hints.ai_socktype = SOCK_DGRAM; | 301 | hints.ai_socktype = SOCK_DGRAM; |
312 | 302 | ||
313 | /* XXX better error handling here... */ | 303 | /* fill in ai with the list of hosts resolved by the host name */ |
314 | ga_result = getaddrinfo(host, "123", &hints, &ai); | 304 | ga_result = getaddrinfo(host, "123", &hints, &ai); |
315 | if(ga_result!=0){ | 305 | if(ga_result!=0){ |
316 | fprintf(stderr, "error getting address for %s: %s\n", | 306 | die(STATE_UNKNOWN, "error getting address for %s: %s\n", |
317 | host, gai_strerror(ga_result)); | 307 | host, gai_strerror(ga_result)); |
318 | return -1.0; | ||
319 | } | 308 | } |
320 | 309 | ||
321 | /* count te number of returned hosts, and allocate an array of sockets */ | 310 | /* count the number of returned hosts, and allocate stuff accordingly */ |
322 | ai_tmp=ai; | 311 | for(ai_tmp=ai; ai_tmp!=NULL; ai_tmp=ai_tmp->ai_next){ num_hosts++; } |
323 | while(ai_tmp){ | 312 | req=(ntp_message*)malloc(sizeof(ntp_message)*num_hosts); |
324 | ai_tmp = ai_tmp->ai_next; | 313 | if(req==NULL) die(STATE_UNKNOWN, "can not allocate ntp message array"); |
325 | num_hosts++; | ||
326 | } | ||
327 | socklist=(int*)malloc(sizeof(int)*num_hosts); | 314 | socklist=(int*)malloc(sizeof(int)*num_hosts); |
328 | if(socklist==NULL) die(STATE_UNKNOWN, "can not allocate socket array"); | 315 | if(socklist==NULL) die(STATE_UNKNOWN, "can not allocate socket array"); |
316 | ufds=(struct pollfd*)malloc(sizeof(struct pollfd)*num_hosts); | ||
317 | if(ufds==NULL) die(STATE_UNKNOWN, "can not allocate socket array"); | ||
318 | servers=(ntp_server_results*)malloc(sizeof(ntp_server_results)*num_hosts); | ||
319 | if(servers==NULL) die(STATE_UNKNOWN, "can not allocate server array"); | ||
320 | memset(servers, 0, sizeof(ntp_server_results)*num_hosts); | ||
329 | 321 | ||
330 | /* setup each socket for writing */ | 322 | /* setup each socket for writing, and the corresponding struct pollfd */ |
331 | ai_tmp=ai; | 323 | ai_tmp=ai; |
332 | for(i=0;ai_tmp;i++){ | 324 | for(i=0;ai_tmp;i++){ |
333 | socklist[i]=socket(ai_tmp->ai_family, SOCK_DGRAM, IPPROTO_UDP); | 325 | socklist[i]=socket(ai_tmp->ai_family, SOCK_DGRAM, IPPROTO_UDP); |
@@ -337,37 +329,88 @@ double offset_request_ntpdate(const char *host){ | |||
337 | } | 329 | } |
338 | if(connect(socklist[i], ai_tmp->ai_addr, ai_tmp->ai_addrlen)){ | 330 | if(connect(socklist[i], ai_tmp->ai_addr, ai_tmp->ai_addrlen)){ |
339 | die(STATE_UNKNOWN, "can't create socket connection"); | 331 | die(STATE_UNKNOWN, "can't create socket connection"); |
332 | } else { | ||
333 | ufds[i].fd=socklist[i]; | ||
334 | ufds[i].events=POLLIN; | ||
335 | ufds[i].revents=0; | ||
340 | } | 336 | } |
341 | ai_tmp = ai_tmp->ai_next; | 337 | ai_tmp = ai_tmp->ai_next; |
342 | } | 338 | } |
343 | 339 | ||
344 | /* now do AVG_NUM checks to each host. this needs to be optimized | 340 | /* now do AVG_NUM checks to each host. */ |
345 | * two ways: | 341 | while(servers_completed<num_hosts){ |
346 | * - use some parellization w/poll for much faster results. currently | 342 | |
347 | * we do send/recv, send/recv, etc, whereas we could use poll(), to | 343 | /* write to any servers that are free and have done < AVG_NUM reqs */ |
348 | * determine when to read and just do a bunch of writing when we | 344 | /* XXX we need some kind of ability to retransmit lost packets. |
349 | * have free time. | 345 | * XXX one way would be replace "waiting" with a timestamp and |
350 | * - behave like ntpdate and only take the 5 best responses. | 346 | * XXX if the timestamp is old enough the request is re-transmitted. |
351 | */ | 347 | * XXX then a certain number of failures could mark a server as |
352 | for(i=0; i<AVG_NUM; i++){ | 348 | * XXX bad, which is what i imagine that ntpdate does though |
353 | if(verbose) printf("offset calculation run %d/%d\n", i+1, AVG_NUM); | 349 | * XXX i can't confirm it (i think it still only sends a max |
354 | for(j=0; j<num_hosts; j++){ | 350 | * XXX of AVG_NUM requests, but what does it do if one fails |
355 | if(verbose) printf("peer %d: ", j); | 351 | * XXX but the others succeed? */ |
356 | setup_request(&req); | 352 | /* XXX also we need the ability to cut out failed/unresponsive |
357 | write(socklist[j], &req, sizeof(ntp_message)); | 353 | * XXX servers. currently after doing all other servers we |
358 | read(socklist[j], &req, sizeof(ntp_message)); | 354 | * XXX still wait for them until the bitter end/timeout. */ |
359 | gettimeofday(&recv_time, NULL); | 355 | one_written=0; |
360 | offset=calc_offset(&req, &recv_time); | 356 | for(i=0; i<num_hosts; i++){ |
361 | if(verbose) printf("offset: %g\n", offset); | 357 | if(!servers[i].waiting && servers[i].num_responses<AVG_NUM){ |
362 | avg_offset+=offset; | 358 | if(verbose) printf("sending request to peer %d\n", i); |
359 | setup_request(&req[i]); | ||
360 | write(socklist[i], &req[i], sizeof(ntp_message)); | ||
361 | servers[i].waiting=1; | ||
362 | one_written=1; | ||
363 | break; | ||
364 | } | ||
363 | } | 365 | } |
364 | avg_offset/=num_hosts; | 366 | |
367 | /* quickly poll for any sockets with pending data */ | ||
368 | servers_readable=poll(ufds, num_hosts, 100); | ||
369 | if(servers_readable==-1){ | ||
370 | perror("polling ntp sockets"); | ||
371 | die(STATE_UNKNOWN, "communication errors"); | ||
372 | } | ||
373 | |||
374 | /* read from any sockets with pending data */ | ||
375 | for(i=0; servers_readable && i<num_hosts; i++){ | ||
376 | if(ufds[i].revents&POLLIN){ | ||
377 | if(verbose) { | ||
378 | printf("response from peer %d: ", i); | ||
379 | } | ||
380 | read(ufds[i].fd, &req[i], sizeof(ntp_message)); | ||
381 | gettimeofday(&recv_time, NULL); | ||
382 | respnum=servers[i].num_responses++; | ||
383 | servers[i].offset[respnum]=calc_offset(&req[i], &recv_time); | ||
384 | if(verbose) { | ||
385 | printf("offset %g\n", servers[i].offset[respnum]); | ||
386 | } | ||
387 | servers[i].waiting=0; | ||
388 | servers_readable--; | ||
389 | if(servers[i].num_responses==AVG_NUM) servers_completed++; | ||
390 | } | ||
391 | } | ||
392 | /* lather, rinse, repeat. */ | ||
365 | } | 393 | } |
366 | avg_offset/=AVG_NUM; | ||
367 | if(verbose) printf("overall average offset: %g\n", avg_offset); | ||
368 | 394 | ||
395 | /* finally, calculate the average offset */ | ||
396 | /* XXX still something about the "top 5" */ | ||
397 | for(i=0;i<num_hosts;i++){ | ||
398 | for(j=0;j<servers[i].num_responses;j++){ | ||
399 | offsets_recvd++; | ||
400 | avg_offset+=servers[i].offset[j]; | ||
401 | } | ||
402 | } | ||
403 | avg_offset/=offsets_recvd; | ||
404 | |||
405 | /* cleanup */ | ||
369 | for(j=0; j<num_hosts; j++){ close(socklist[j]); } | 406 | for(j=0; j<num_hosts; j++){ close(socklist[j]); } |
407 | free(socklist); | ||
408 | free(ufds); | ||
409 | free(servers); | ||
410 | free(req); | ||
370 | freeaddrinfo(ai); | 411 | freeaddrinfo(ai); |
412 | |||
413 | if(verbose) printf("overall average offset: %g\n", avg_offset); | ||
371 | return avg_offset; | 414 | return avg_offset; |
372 | } | 415 | } |
373 | 416 | ||