[Nagiosplug-checkins] nagiosplug/plugins check_ntp.c,1.5,1.6

M. Sean Finney seanius at users.sourceforge.net
Wed Apr 12 03:01:03 CEST 2006


Update of /cvsroot/nagiosplug/nagiosplug/plugins
In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv9251

Modified Files:
	check_ntp.c 
Log Message:
the offset_requests are now parallelized.  still stuff needs to be
done (conveniently marked with XXX), but on well behaving networks
the plugin should behave more or less identical to check_ntp.pl now.


Index: check_ntp.c
===================================================================
RCS file: /cvsroot/nagiosplug/nagiosplug/plugins/check_ntp.c,v
retrieving revision 1.5
retrieving revision 1.6
diff -u -d -r1.5 -r1.6
--- check_ntp.c	12 Apr 2006 06:33:14 -0000	1.5
+++ check_ntp.c	12 Apr 2006 10:00:23 -0000	1.6
@@ -29,6 +29,7 @@
 #include "common.h"
 #include "netutils.h"
 #include "utils.h"
+#include <sys/poll.h>
 
 static char *server_address=NULL;
 static int verbose=0;
@@ -64,6 +65,13 @@
 	uint64_t txts;       /* time at which request departed server */
 } ntp_message;
 
+/* this structure holds data about results from querying offset from a peer */
+typedef struct {
+	int waiting;            /* we set to 1 to signal waiting for a response */  
+	int num_responses;      /* number of successfully recieved responses */
+	double offset[AVG_NUM]; /* offsets from each response */
+} ntp_server_results;
+
 /* this structure holds everything in an ntp control message as per rfc1305 */
 typedef struct {
 	uint8_t flags;       /* byte with leapindicator,vers,mode. see macros */
@@ -271,38 +279,20 @@
 	TVtoNTP64(t,p->txts);
 }
 
+/* do everything we need to get the total average offset
+ * - we use a certain amount of parallelization with poll() to ensure
+ *   we don't waste time sitting around waiting for single packets. 
+ * - we also "manually" handle resolving host names and connecting, because
+ *   we have to do it in a way that our lazy macros don't handle currently :( */
 double offset_request(const char *host){
-	int i=0, conn=-1;
-	ntp_message req;
-	double next_offset=0., avg_offset=0.;
-	struct timeval recv_time;
-
-	for(i=0; i<AVG_NUM; i++){
-		if(verbose) printf("offset run: %d/%d\n", i+1, AVG_NUM);
-		setup_request(&req);
-		my_udp_connect(server_address, 123, &conn);
-		write(conn, &req, sizeof(ntp_message));
-		read(conn, &req, sizeof(ntp_message));
-		gettimeofday(&recv_time, NULL);
-		/* if(verbose) print_packet(&req); */
-		close(conn);
-		next_offset=calc_offset(&req, &recv_time);
-		if(verbose) printf("offset: %g\n", next_offset);
-		avg_offset+=next_offset;
-	}
-	avg_offset/=AVG_NUM;
-	if(verbose) printf("average offset: %g\n", avg_offset);
-	return avg_offset;
-}
-
-
-/* this should behave more like ntpdate, but needs optomisations... */
-double offset_request_ntpdate(const char *host){
-	int i=0, j=0, ga_result=0, num_hosts=0, *socklist=NULL;
-	ntp_message req;
-	double offset=0., avg_offset=0.;
+	int i=0, j=0, ga_result=0, num_hosts=0, *socklist=NULL, respnum=0;
+	int servers_completed=0, one_written=0, servers_readable=0, offsets_recvd=0;
+	ntp_message *req=NULL;
+	double avg_offset=0.;
 	struct timeval recv_time;
 	struct addrinfo *ai=NULL, *ai_tmp=NULL, hints;
+	struct pollfd *ufds=NULL;
+	ntp_server_results *servers=NULL;
 
 	/* setup hints to only return results from getaddrinfo that we'd like */
 	memset(&hints, 0, sizeof(struct addrinfo));
@@ -310,24 +300,26 @@
 	hints.ai_protocol = IPPROTO_UDP;
 	hints.ai_socktype = SOCK_DGRAM;
 
-	/* XXX better error handling here... */
+	/* fill in ai with the list of hosts resolved by the host name */
 	ga_result = getaddrinfo(host, "123", &hints, &ai);
 	if(ga_result!=0){
-		fprintf(stderr, "error getting address for %s: %s\n",
-				host, gai_strerror(ga_result));
-		return -1.0;
+		die(STATE_UNKNOWN, "error getting address for %s: %s\n",
+		    host, gai_strerror(ga_result));
 	}
 
-	/* count te number of returned hosts, and allocate an array of sockets */
-	ai_tmp=ai;
-	while(ai_tmp){
-		ai_tmp = ai_tmp->ai_next;
-		num_hosts++;
-	}
+	/* count the number of returned hosts, and allocate stuff accordingly */
+	for(ai_tmp=ai; ai_tmp!=NULL; ai_tmp=ai_tmp->ai_next){ num_hosts++; }
+	req=(ntp_message*)malloc(sizeof(ntp_message)*num_hosts);
+	if(req==NULL) die(STATE_UNKNOWN, "can not allocate ntp message array");
 	socklist=(int*)malloc(sizeof(int)*num_hosts);
 	if(socklist==NULL) die(STATE_UNKNOWN, "can not allocate socket array");
+	ufds=(struct pollfd*)malloc(sizeof(struct pollfd)*num_hosts);
+	if(ufds==NULL) die(STATE_UNKNOWN, "can not allocate socket array");
+	servers=(ntp_server_results*)malloc(sizeof(ntp_server_results)*num_hosts);
+	if(servers==NULL) die(STATE_UNKNOWN, "can not allocate server array");
+	memset(servers, 0, sizeof(ntp_server_results)*num_hosts);
 
-	/* setup each socket for writing */
+	/* setup each socket for writing, and the corresponding struct pollfd */
 	ai_tmp=ai;
 	for(i=0;ai_tmp;i++){
 		socklist[i]=socket(ai_tmp->ai_family, SOCK_DGRAM, IPPROTO_UDP);
@@ -337,37 +329,88 @@
 		}
 		if(connect(socklist[i], ai_tmp->ai_addr, ai_tmp->ai_addrlen)){
 			die(STATE_UNKNOWN, "can't create socket connection");
+		} else {
+			ufds[i].fd=socklist[i];
+			ufds[i].events=POLLIN;
+			ufds[i].revents=0;
 		}
 		ai_tmp = ai_tmp->ai_next;
 	}
 
-	/* now do AVG_NUM checks to each host. this needs to be optimized
-	 * two ways:
-	 *  - use some parellization w/poll for much faster results.  currently
-	 *    we do send/recv, send/recv, etc, whereas we could use poll(), to
-	 *    determine when to read and just do a bunch of writing when we
-	 *    have free time.
-	 *  - behave like ntpdate and only take the 5 best responses.
-	 */
-	for(i=0; i<AVG_NUM; i++){
-		if(verbose) printf("offset calculation run %d/%d\n", i+1, AVG_NUM);
-		for(j=0; j<num_hosts; j++){
-			if(verbose) printf("peer %d: ", j);
-			setup_request(&req);
-			write(socklist[j], &req, sizeof(ntp_message));
-			read(socklist[j], &req, sizeof(ntp_message));
-			gettimeofday(&recv_time, NULL);
-			offset=calc_offset(&req, &recv_time);
-			if(verbose) printf("offset: %g\n", offset);
-			avg_offset+=offset;
+	/* now do AVG_NUM checks to each host. */
+	while(servers_completed<num_hosts){
+
+		/* write to any servers that are free and have done < AVG_NUM reqs */
+		/* XXX we need some kind of ability to retransmit lost packets.
+		 * XXX one way would be replace "waiting" with a timestamp and
+		 * XXX if the timestamp is old enough the request is re-transmitted.
+		 * XXX then a certain number of failures could mark a server as
+		 * XXX bad, which is what i imagine that ntpdate does though
+		 * XXX i can't confirm it (i think it still only sends a max
+		 * XXX of AVG_NUM requests, but what does it do if one fails
+		 * XXX but the others succeed? */
+		/* XXX also we need the ability to cut out failed/unresponsive
+		 * XXX servers.  currently after doing all other servers we
+		 * XXX still wait for them until the bitter end/timeout. */
+		one_written=0;
+		for(i=0; i<num_hosts; i++){
+			if(!servers[i].waiting && servers[i].num_responses<AVG_NUM){
+				if(verbose) printf("sending request to peer %d\n", i);
+				setup_request(&req[i]);
+				write(socklist[i], &req[i], sizeof(ntp_message));
+				servers[i].waiting=1;
+				one_written=1;
+				break;
+			}
 		}
-		avg_offset/=num_hosts;
+
+		/* quickly poll for any sockets with pending data */
+		servers_readable=poll(ufds, num_hosts, 100);
+		if(servers_readable==-1){
+			perror("polling ntp sockets");
+			die(STATE_UNKNOWN, "communication errors");
+		}
+
+		/* read from any sockets with pending data */
+		for(i=0; servers_readable && i<num_hosts; i++){
+			if(ufds[i].revents&POLLIN){
+				if(verbose) {
+					printf("response from peer %d: ", i);
+				}
+				read(ufds[i].fd, &req[i], sizeof(ntp_message));
+				gettimeofday(&recv_time, NULL);
+				respnum=servers[i].num_responses++;
+				servers[i].offset[respnum]=calc_offset(&req[i], &recv_time);
+				if(verbose) {
+					printf("offset %g\n", servers[i].offset[respnum]);
+				}
+				servers[i].waiting=0;
+				servers_readable--;
+				if(servers[i].num_responses==AVG_NUM) servers_completed++;
+			}
+		}
+		/* lather, rinse, repeat. */
 	}
-	avg_offset/=AVG_NUM;
-	if(verbose) printf("overall average offset: %g\n", avg_offset);
 
+	/* finally, calculate the average offset */
+	/* XXX still something about the "top 5" */
+	for(i=0;i<num_hosts;i++){
+		for(j=0;j<servers[i].num_responses;j++){
+			offsets_recvd++;
+			avg_offset+=servers[i].offset[j];
+		}
+	}
+	avg_offset/=offsets_recvd;
+
+	/* cleanup */
 	for(j=0; j<num_hosts; j++){ close(socklist[j]); }
+	free(socklist);
+	free(ufds);
+	free(servers);
+	free(req);
 	freeaddrinfo(ai);
+
+	if(verbose) printf("overall average offset: %g\n", avg_offset);
 	return avg_offset;
 }
 





More information about the Commits mailing list