diff options
Diffstat (limited to 'plugins')
-rw-r--r-- | plugins/check_ntpd.c | 918 | ||||
-rw-r--r-- | plugins/check_time_ntp.c | 918 |
2 files changed, 1836 insertions, 0 deletions
diff --git a/plugins/check_ntpd.c b/plugins/check_ntpd.c new file mode 100644 index 00000000..df6e02f8 --- /dev/null +++ b/plugins/check_ntpd.c | |||
@@ -0,0 +1,918 @@ | |||
1 | /****************************************************************************** | ||
2 | * | ||
3 | * Nagios check_ntp plugin | ||
4 | * | ||
5 | * License: GPL | ||
6 | * Copyright (c) 2006 sean finney <seanius@seanius.net> | ||
7 | * Copyright (c) 2007 nagios-plugins team | ||
8 | * | ||
9 | * Last Modified: $Date$ | ||
10 | * | ||
11 | * Description: | ||
12 | * | ||
13 | * This file contains the check_ntp plugin | ||
14 | * | ||
15 | * This plugin to check ntp servers independant of any commandline | ||
16 | * programs or external libraries. | ||
17 | * | ||
18 | * | ||
19 | * License Information: | ||
20 | * | ||
21 | * This program is free software; you can redistribute it and/or modify | ||
22 | * it under the terms of the GNU General Public License as published by | ||
23 | * the Free Software Foundation; either version 2 of the License, or | ||
24 | * (at your option) any later version. | ||
25 | * | ||
26 | * This program is distributed in the hope that it will be useful, | ||
27 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
28 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
29 | * GNU General Public License for more details. | ||
30 | * | ||
31 | * You should have received a copy of the GNU General Public License | ||
32 | * along with this program; if not, write to the Free Software | ||
33 | * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. | ||
34 | |||
35 | $Id$ | ||
36 | |||
37 | *****************************************************************************/ | ||
38 | |||
39 | const char *progname = "check_ntp"; | ||
40 | const char *revision = "$Revision$"; | ||
41 | const char *copyright = "2007"; | ||
42 | const char *email = "nagiosplug-devel@lists.sourceforge.net"; | ||
43 | |||
44 | #include "common.h" | ||
45 | #include "netutils.h" | ||
46 | #include "utils.h" | ||
47 | |||
48 | static char *server_address=NULL; | ||
49 | static int verbose=0; | ||
50 | static short do_offset=0; | ||
51 | static char *owarn="60"; | ||
52 | static char *ocrit="120"; | ||
53 | static short do_stratum=0; | ||
54 | static char *swarn="16"; | ||
55 | static char *scrit="16"; | ||
56 | static short do_jitter=0; | ||
57 | static char *jwarn="5000"; | ||
58 | static char *jcrit="10000"; | ||
59 | |||
60 | int process_arguments (int, char **); | ||
61 | thresholds *offset_thresholds = NULL; | ||
62 | thresholds *jitter_thresholds = NULL; | ||
63 | thresholds *stratum_thresholds = NULL; | ||
64 | void print_help (void); | ||
65 | void print_usage (void); | ||
66 | |||
67 | /* number of times to perform each request to get a good average. */ | ||
68 | #define AVG_NUM 4 | ||
69 | |||
70 | /* max size of control message data */ | ||
71 | #define MAX_CM_SIZE 468 | ||
72 | |||
73 | /* this structure holds everything in an ntp request/response as per rfc1305 */ | ||
74 | typedef struct { | ||
75 | uint8_t flags; /* byte with leapindicator,vers,mode. see macros */ | ||
76 | uint8_t stratum; /* clock stratum */ | ||
77 | int8_t poll; /* polling interval */ | ||
78 | int8_t precision; /* precision of the local clock */ | ||
79 | int32_t rtdelay; /* total rt delay, as a fixed point num. see macros */ | ||
80 | uint32_t rtdisp; /* like above, but for max err to primary src */ | ||
81 | uint32_t refid; /* ref clock identifier */ | ||
82 | uint64_t refts; /* reference timestamp. local time local clock */ | ||
83 | uint64_t origts; /* time at which request departed client */ | ||
84 | uint64_t rxts; /* time at which request arrived at server */ | ||
85 | uint64_t txts; /* time at which request departed server */ | ||
86 | } ntp_message; | ||
87 | |||
88 | /* this structure holds data about results from querying offset from a peer */ | ||
89 | typedef struct { | ||
90 | time_t waiting; /* ts set when we started waiting for a response */ | ||
91 | int num_responses; /* number of successfully recieved responses */ | ||
92 | uint8_t stratum; /* copied verbatim from the ntp_message */ | ||
93 | double rtdelay; /* converted from the ntp_message */ | ||
94 | double rtdisp; /* converted from the ntp_message */ | ||
95 | double offset[AVG_NUM]; /* offsets from each response */ | ||
96 | uint8_t flags; /* byte with leapindicator,vers,mode. see macros */ | ||
97 | } ntp_server_results; | ||
98 | |||
99 | /* this structure holds everything in an ntp control message as per rfc1305 */ | ||
100 | typedef struct { | ||
101 | uint8_t flags; /* byte with leapindicator,vers,mode. see macros */ | ||
102 | uint8_t op; /* R,E,M bits and Opcode */ | ||
103 | uint16_t seq; /* Packet sequence */ | ||
104 | uint16_t status; /* Clock status */ | ||
105 | uint16_t assoc; /* Association */ | ||
106 | uint16_t offset; /* Similar to TCP sequence # */ | ||
107 | uint16_t count; /* # bytes of data */ | ||
108 | char data[MAX_CM_SIZE]; /* ASCII data of the request */ | ||
109 | /* NB: not necessarily NULL terminated! */ | ||
110 | } ntp_control_message; | ||
111 | |||
112 | /* this is an association/status-word pair found in control packet reponses */ | ||
113 | typedef struct { | ||
114 | uint16_t assoc; | ||
115 | uint16_t status; | ||
116 | } ntp_assoc_status_pair; | ||
117 | |||
118 | /* bits 1,2 are the leap indicator */ | ||
119 | #define LI_MASK 0xc0 | ||
120 | #define LI(x) ((x&LI_MASK)>>6) | ||
121 | #define LI_SET(x,y) do{ x |= ((y<<6)&LI_MASK); }while(0) | ||
122 | /* and these are the values of the leap indicator */ | ||
123 | #define LI_NOWARNING 0x00 | ||
124 | #define LI_EXTRASEC 0x01 | ||
125 | #define LI_MISSINGSEC 0x02 | ||
126 | #define LI_ALARM 0x03 | ||
127 | /* bits 3,4,5 are the ntp version */ | ||
128 | #define VN_MASK 0x38 | ||
129 | #define VN(x) ((x&VN_MASK)>>3) | ||
130 | #define VN_SET(x,y) do{ x |= ((y<<3)&VN_MASK); }while(0) | ||
131 | #define VN_RESERVED 0x02 | ||
132 | /* bits 6,7,8 are the ntp mode */ | ||
133 | #define MODE_MASK 0x07 | ||
134 | #define MODE(x) (x&MODE_MASK) | ||
135 | #define MODE_SET(x,y) do{ x |= (y&MODE_MASK); }while(0) | ||
136 | /* here are some values */ | ||
137 | #define MODE_CLIENT 0x03 | ||
138 | #define MODE_CONTROLMSG 0x06 | ||
139 | /* In control message, bits 8-10 are R,E,M bits */ | ||
140 | #define REM_MASK 0xe0 | ||
141 | #define REM_RESP 0x80 | ||
142 | #define REM_ERROR 0x40 | ||
143 | #define REM_MORE 0x20 | ||
144 | /* In control message, bits 11 - 15 are opcode */ | ||
145 | #define OP_MASK 0x1f | ||
146 | #define OP_SET(x,y) do{ x |= (y&OP_MASK); }while(0) | ||
147 | #define OP_READSTAT 0x01 | ||
148 | #define OP_READVAR 0x02 | ||
149 | /* In peer status bytes, bits 6,7,8 determine clock selection status */ | ||
150 | #define PEER_SEL(x) ((ntohs(x)>>8)&0x07) | ||
151 | #define PEER_INCLUDED 0x04 | ||
152 | #define PEER_SYNCSOURCE 0x06 | ||
153 | |||
154 | /** | ||
155 | ** a note about the 32-bit "fixed point" numbers: | ||
156 | ** | ||
157 | they are divided into halves, each being a 16-bit int in network byte order: | ||
158 | - the first 16 bits are an int on the left side of a decimal point. | ||
159 | - the second 16 bits represent a fraction n/(2^16) | ||
160 | likewise for the 64-bit "fixed point" numbers with everything doubled :) | ||
161 | **/ | ||
162 | |||
163 | /* macros to access the left/right 16 bits of a 32-bit ntp "fixed point" | ||
164 | number. note that these can be used as lvalues too */ | ||
165 | #define L16(x) (((uint16_t*)&x)[0]) | ||
166 | #define R16(x) (((uint16_t*)&x)[1]) | ||
167 | /* macros to access the left/right 32 bits of a 64-bit ntp "fixed point" | ||
168 | number. these too can be used as lvalues */ | ||
169 | #define L32(x) (((uint32_t*)&x)[0]) | ||
170 | #define R32(x) (((uint32_t*)&x)[1]) | ||
171 | |||
172 | /* ntp wants seconds since 1/1/00, epoch is 1/1/70. this is the difference */ | ||
173 | #define EPOCHDIFF 0x83aa7e80UL | ||
174 | |||
175 | /* extract a 32-bit ntp fixed point number into a double */ | ||
176 | #define NTP32asDOUBLE(x) (ntohs(L16(x)) + (double)ntohs(R16(x))/65536.0) | ||
177 | |||
178 | /* likewise for a 64-bit ntp fp number */ | ||
179 | #define NTP64asDOUBLE(n) (double)(((uint64_t)n)?\ | ||
180 | (ntohl(L32(n))-EPOCHDIFF) + \ | ||
181 | (.00000001*(0.5+(double)(ntohl(R32(n))/42.94967296))):\ | ||
182 | 0) | ||
183 | |||
184 | /* convert a struct timeval to a double */ | ||
185 | #define TVasDOUBLE(x) (double)(x.tv_sec+(0.000001*x.tv_usec)) | ||
186 | |||
187 | /* convert an ntp 64-bit fp number to a struct timeval */ | ||
188 | #define NTP64toTV(n,t) \ | ||
189 | do{ if(!n) t.tv_sec = t.tv_usec = 0; \ | ||
190 | else { \ | ||
191 | t.tv_sec=ntohl(L32(n))-EPOCHDIFF; \ | ||
192 | t.tv_usec=(int)(0.5+(double)(ntohl(R32(n))/4294.967296)); \ | ||
193 | } \ | ||
194 | }while(0) | ||
195 | |||
196 | /* convert a struct timeval to an ntp 64-bit fp number */ | ||
197 | #define TVtoNTP64(t,n) \ | ||
198 | do{ if(!t.tv_usec && !t.tv_sec) n=0x0UL; \ | ||
199 | else { \ | ||
200 | L32(n)=htonl(t.tv_sec + EPOCHDIFF); \ | ||
201 | R32(n)=htonl((uint64_t)((4294.967296*t.tv_usec)+.5)); \ | ||
202 | } \ | ||
203 | } while(0) | ||
204 | |||
205 | /* NTP control message header is 12 bytes, plus any data in the data | ||
206 | * field, plus null padding to the nearest 32-bit boundary per rfc. | ||
207 | */ | ||
208 | #define SIZEOF_NTPCM(m) (12+ntohs(m.count)+((m.count)?4-(ntohs(m.count)%4):0)) | ||
209 | |||
210 | /* finally, a little helper or two for debugging: */ | ||
211 | #define DBG(x) do{if(verbose>1){ x; }}while(0); | ||
212 | #define PRINTSOCKADDR(x) \ | ||
213 | do{ \ | ||
214 | printf("%u.%u.%u.%u", (x>>24)&0xff, (x>>16)&0xff, (x>>8)&0xff, x&0xff);\ | ||
215 | }while(0); | ||
216 | |||
217 | /* calculate the offset of the local clock */ | ||
218 | static inline double calc_offset(const ntp_message *m, const struct timeval *t){ | ||
219 | double client_tx, peer_rx, peer_tx, client_rx; | ||
220 | client_tx = NTP64asDOUBLE(m->origts); | ||
221 | peer_rx = NTP64asDOUBLE(m->rxts); | ||
222 | peer_tx = NTP64asDOUBLE(m->txts); | ||
223 | client_rx=TVasDOUBLE((*t)); | ||
224 | return (.5*((peer_tx-client_rx)+(peer_rx-client_tx))); | ||
225 | } | ||
226 | |||
227 | /* print out a ntp packet in human readable/debuggable format */ | ||
228 | void print_ntp_message(const ntp_message *p){ | ||
229 | struct timeval ref, orig, rx, tx; | ||
230 | |||
231 | NTP64toTV(p->refts,ref); | ||
232 | NTP64toTV(p->origts,orig); | ||
233 | NTP64toTV(p->rxts,rx); | ||
234 | NTP64toTV(p->txts,tx); | ||
235 | |||
236 | printf("packet contents:\n"); | ||
237 | printf("\tflags: 0x%.2x\n", p->flags); | ||
238 | printf("\t li=%d (0x%.2x)\n", LI(p->flags), p->flags&LI_MASK); | ||
239 | printf("\t vn=%d (0x%.2x)\n", VN(p->flags), p->flags&VN_MASK); | ||
240 | printf("\t mode=%d (0x%.2x)\n", MODE(p->flags), p->flags&MODE_MASK); | ||
241 | printf("\tstratum = %d\n", p->stratum); | ||
242 | printf("\tpoll = %g\n", pow(2, p->poll)); | ||
243 | printf("\tprecision = %g\n", pow(2, p->precision)); | ||
244 | printf("\trtdelay = %-.16g\n", NTP32asDOUBLE(p->rtdelay)); | ||
245 | printf("\trtdisp = %-.16g\n", NTP32asDOUBLE(p->rtdisp)); | ||
246 | printf("\trefid = %x\n", p->refid); | ||
247 | printf("\trefts = %-.16g\n", NTP64asDOUBLE(p->refts)); | ||
248 | printf("\torigts = %-.16g\n", NTP64asDOUBLE(p->origts)); | ||
249 | printf("\trxts = %-.16g\n", NTP64asDOUBLE(p->rxts)); | ||
250 | printf("\ttxts = %-.16g\n", NTP64asDOUBLE(p->txts)); | ||
251 | } | ||
252 | |||
253 | void print_ntp_control_message(const ntp_control_message *p){ | ||
254 | int i=0, numpeers=0; | ||
255 | const ntp_assoc_status_pair *peer=NULL; | ||
256 | |||
257 | printf("control packet contents:\n"); | ||
258 | printf("\tflags: 0x%.2x , 0x%.2x\n", p->flags, p->op); | ||
259 | printf("\t li=%d (0x%.2x)\n", LI(p->flags), p->flags&LI_MASK); | ||
260 | printf("\t vn=%d (0x%.2x)\n", VN(p->flags), p->flags&VN_MASK); | ||
261 | printf("\t mode=%d (0x%.2x)\n", MODE(p->flags), p->flags&MODE_MASK); | ||
262 | printf("\t response=%d (0x%.2x)\n", (p->op&REM_RESP)>0, p->op&REM_RESP); | ||
263 | printf("\t more=%d (0x%.2x)\n", (p->op&REM_MORE)>0, p->op&REM_MORE); | ||
264 | printf("\t error=%d (0x%.2x)\n", (p->op&REM_ERROR)>0, p->op&REM_ERROR); | ||
265 | printf("\t op=%d (0x%.2x)\n", p->op&OP_MASK, p->op&OP_MASK); | ||
266 | printf("\tsequence: %d (0x%.2x)\n", ntohs(p->seq), ntohs(p->seq)); | ||
267 | printf("\tstatus: %d (0x%.2x)\n", ntohs(p->status), ntohs(p->status)); | ||
268 | printf("\tassoc: %d (0x%.2x)\n", ntohs(p->assoc), ntohs(p->assoc)); | ||
269 | printf("\toffset: %d (0x%.2x)\n", ntohs(p->offset), ntohs(p->offset)); | ||
270 | printf("\tcount: %d (0x%.2x)\n", ntohs(p->count), ntohs(p->count)); | ||
271 | numpeers=ntohs(p->count)/(sizeof(ntp_assoc_status_pair)); | ||
272 | if(p->op&REM_RESP && p->op&OP_READSTAT){ | ||
273 | peer=(ntp_assoc_status_pair*)p->data; | ||
274 | for(i=0;i<numpeers;i++){ | ||
275 | printf("\tpeer id %.2x status %.2x", | ||
276 | ntohs(peer[i].assoc), ntohs(peer[i].status)); | ||
277 | if (PEER_SEL(peer[i].status) >= PEER_INCLUDED){ | ||
278 | if(PEER_SEL(peer[i].status) >= PEER_SYNCSOURCE){ | ||
279 | printf(" <-- current sync source"); | ||
280 | } else { | ||
281 | printf(" <-- current sync candidate"); | ||
282 | } | ||
283 | } | ||
284 | printf("\n"); | ||
285 | } | ||
286 | } | ||
287 | } | ||
288 | |||
289 | void setup_request(ntp_message *p){ | ||
290 | struct timeval t; | ||
291 | |||
292 | memset(p, 0, sizeof(ntp_message)); | ||
293 | LI_SET(p->flags, LI_ALARM); | ||
294 | VN_SET(p->flags, 4); | ||
295 | MODE_SET(p->flags, MODE_CLIENT); | ||
296 | p->poll=4; | ||
297 | p->precision=(int8_t)0xfa; | ||
298 | L16(p->rtdelay)=htons(1); | ||
299 | L16(p->rtdisp)=htons(1); | ||
300 | |||
301 | gettimeofday(&t, NULL); | ||
302 | TVtoNTP64(t,p->txts); | ||
303 | } | ||
304 | |||
305 | /* select the "best" server from a list of servers, and return its index. | ||
306 | * this is done by filtering servers based on stratum, dispersion, and | ||
307 | * finally round-trip delay. */ | ||
308 | int best_offset_server(const ntp_server_results *slist, int nservers){ | ||
309 | int i=0, j=0, cserver=0, candidates[5], csize=0; | ||
310 | |||
311 | /* for each server */ | ||
312 | for(cserver=0; cserver<nservers; cserver++){ | ||
313 | /* sort out servers with error flags */ | ||
314 | if ( LI(slist[cserver].flags) != LI_NOWARNING ){ | ||
315 | if (verbose) printf("discarding peer id %d: flags=%d\n", cserver, LI(slist[cserver].flags)); | ||
316 | break; | ||
317 | } | ||
318 | |||
319 | /* compare it to each of the servers already in the candidate list */ | ||
320 | for(i=0; i<csize; i++){ | ||
321 | /* does it have an equal or better stratum? */ | ||
322 | if(slist[cserver].stratum <= slist[i].stratum){ | ||
323 | /* does it have an equal or better dispersion? */ | ||
324 | if(slist[cserver].rtdisp <= slist[i].rtdisp){ | ||
325 | /* does it have a better rtdelay? */ | ||
326 | if(slist[cserver].rtdelay < slist[i].rtdelay){ | ||
327 | break; | ||
328 | } | ||
329 | } | ||
330 | } | ||
331 | } | ||
332 | |||
333 | /* if we haven't reached the current list's end, move everyone | ||
334 | * over one to the right, and insert the new candidate */ | ||
335 | if(i<csize){ | ||
336 | for(j=5; j>i; j--){ | ||
337 | candidates[j]=candidates[j-1]; | ||
338 | } | ||
339 | } | ||
340 | /* regardless, if they should be on the list... */ | ||
341 | if(i<5) { | ||
342 | candidates[i]=cserver; | ||
343 | if(csize<5) csize++; | ||
344 | /* otherwise discard the server */ | ||
345 | } else { | ||
346 | DBG(printf("discarding peer id %d\n", cserver)); | ||
347 | } | ||
348 | } | ||
349 | |||
350 | if(csize>0) { | ||
351 | DBG(printf("best server selected: peer %d\n", candidates[0])); | ||
352 | return candidates[0]; | ||
353 | } else { | ||
354 | DBG(printf("no peers meeting synchronization criteria :(\n")); | ||
355 | return -1; | ||
356 | } | ||
357 | } | ||
358 | |||
359 | /* do everything we need to get the total average offset | ||
360 | * - we use a certain amount of parallelization with poll() to ensure | ||
361 | * we don't waste time sitting around waiting for single packets. | ||
362 | * - we also "manually" handle resolving host names and connecting, because | ||
363 | * we have to do it in a way that our lazy macros don't handle currently :( */ | ||
364 | double offset_request(const char *host, int *stratum, int *status){ | ||
365 | int i=0, j=0, ga_result=0, num_hosts=0, *socklist=NULL, respnum=0; | ||
366 | int servers_completed=0, one_written=0, one_read=0, servers_readable=0, best_index=-1; | ||
367 | time_t now_time=0, start_ts=0; | ||
368 | ntp_message *req=NULL; | ||
369 | double avg_offset=0.; | ||
370 | struct timeval recv_time; | ||
371 | struct addrinfo *ai=NULL, *ai_tmp=NULL, hints; | ||
372 | struct pollfd *ufds=NULL; | ||
373 | ntp_server_results *servers=NULL; | ||
374 | |||
375 | /* setup hints to only return results from getaddrinfo that we'd like */ | ||
376 | memset(&hints, 0, sizeof(struct addrinfo)); | ||
377 | hints.ai_family = address_family; | ||
378 | hints.ai_protocol = IPPROTO_UDP; | ||
379 | hints.ai_socktype = SOCK_DGRAM; | ||
380 | |||
381 | /* fill in ai with the list of hosts resolved by the host name */ | ||
382 | ga_result = getaddrinfo(host, "123", &hints, &ai); | ||
383 | if(ga_result!=0){ | ||
384 | die(STATE_UNKNOWN, "error getting address for %s: %s\n", | ||
385 | host, gai_strerror(ga_result)); | ||
386 | } | ||
387 | |||
388 | /* count the number of returned hosts, and allocate stuff accordingly */ | ||
389 | for(ai_tmp=ai; ai_tmp!=NULL; ai_tmp=ai_tmp->ai_next){ num_hosts++; } | ||
390 | req=(ntp_message*)malloc(sizeof(ntp_message)*num_hosts); | ||
391 | if(req==NULL) die(STATE_UNKNOWN, "can not allocate ntp message array"); | ||
392 | socklist=(int*)malloc(sizeof(int)*num_hosts); | ||
393 | if(socklist==NULL) die(STATE_UNKNOWN, "can not allocate socket array"); | ||
394 | ufds=(struct pollfd*)malloc(sizeof(struct pollfd)*num_hosts); | ||
395 | if(ufds==NULL) die(STATE_UNKNOWN, "can not allocate socket array"); | ||
396 | servers=(ntp_server_results*)malloc(sizeof(ntp_server_results)*num_hosts); | ||
397 | if(servers==NULL) die(STATE_UNKNOWN, "can not allocate server array"); | ||
398 | memset(servers, 0, sizeof(ntp_server_results)*num_hosts); | ||
399 | |||
400 | /* setup each socket for writing, and the corresponding struct pollfd */ | ||
401 | ai_tmp=ai; | ||
402 | for(i=0;ai_tmp;i++){ | ||
403 | socklist[i]=socket(ai_tmp->ai_family, SOCK_DGRAM, IPPROTO_UDP); | ||
404 | if(socklist[i] == -1) { | ||
405 | perror(NULL); | ||
406 | die(STATE_UNKNOWN, "can not create new socket"); | ||
407 | } | ||
408 | if(connect(socklist[i], ai_tmp->ai_addr, ai_tmp->ai_addrlen)){ | ||
409 | die(STATE_UNKNOWN, "can't create socket connection"); | ||
410 | } else { | ||
411 | ufds[i].fd=socklist[i]; | ||
412 | ufds[i].events=POLLIN; | ||
413 | ufds[i].revents=0; | ||
414 | } | ||
415 | ai_tmp = ai_tmp->ai_next; | ||
416 | } | ||
417 | |||
418 | /* now do AVG_NUM checks to each host. we stop before timeout/2 seconds | ||
419 | * have passed in order to ensure post-processing and jitter time. */ | ||
420 | now_time=start_ts=time(NULL); | ||
421 | while(servers_completed<num_hosts && now_time-start_ts <= socket_timeout/2){ | ||
422 | /* loop through each server and find each one which hasn't | ||
423 | * been touched in the past second or so and is still lacking | ||
424 | * some responses. for each of these servers, send a new request, | ||
425 | * and update the "waiting" timestamp with the current time. */ | ||
426 | one_written=0; | ||
427 | now_time=time(NULL); | ||
428 | |||
429 | for(i=0; i<num_hosts; i++){ | ||
430 | if(servers[i].waiting<now_time && servers[i].num_responses<AVG_NUM){ | ||
431 | if(verbose && servers[i].waiting != 0) printf("re-"); | ||
432 | if(verbose) printf("sending request to peer %d\n", i); | ||
433 | setup_request(&req[i]); | ||
434 | write(socklist[i], &req[i], sizeof(ntp_message)); | ||
435 | servers[i].waiting=now_time; | ||
436 | one_written=1; | ||
437 | break; | ||
438 | } | ||
439 | } | ||
440 | |||
441 | /* quickly poll for any sockets with pending data */ | ||
442 | servers_readable=poll(ufds, num_hosts, 100); | ||
443 | if(servers_readable==-1){ | ||
444 | perror("polling ntp sockets"); | ||
445 | die(STATE_UNKNOWN, "communication errors"); | ||
446 | } | ||
447 | |||
448 | /* read from any sockets with pending data */ | ||
449 | for(i=0; servers_readable && i<num_hosts; i++){ | ||
450 | if(ufds[i].revents&POLLIN && servers[i].num_responses < AVG_NUM){ | ||
451 | if(verbose) { | ||
452 | printf("response from peer %d: ", i); | ||
453 | } | ||
454 | |||
455 | read(ufds[i].fd, &req[i], sizeof(ntp_message)); | ||
456 | gettimeofday(&recv_time, NULL); | ||
457 | DBG(print_ntp_message(&req[i])); | ||
458 | respnum=servers[i].num_responses++; | ||
459 | servers[i].offset[respnum]=calc_offset(&req[i], &recv_time); | ||
460 | if(verbose) { | ||
461 | printf("offset %.10g, stratum %i\n", servers[i].offset[respnum], req[i].stratum); | ||
462 | } | ||
463 | servers[i].stratum=req[i].stratum; | ||
464 | servers[i].rtdisp=NTP32asDOUBLE(req[i].rtdisp); | ||
465 | servers[i].rtdelay=NTP32asDOUBLE(req[i].rtdelay); | ||
466 | servers[i].waiting=0; | ||
467 | servers[i].flags=req[i].flags; | ||
468 | servers_readable--; | ||
469 | one_read = 1; | ||
470 | if(servers[i].num_responses==AVG_NUM) servers_completed++; | ||
471 | } | ||
472 | } | ||
473 | /* lather, rinse, repeat. */ | ||
474 | } | ||
475 | |||
476 | if (one_read == 0) { | ||
477 | die(STATE_CRITICAL, "NTP CRITICAL: No response from NTP server\n"); | ||
478 | } | ||
479 | |||
480 | /* now, pick the best server from the list */ | ||
481 | best_index=best_offset_server(servers, num_hosts); | ||
482 | if(best_index < 0){ | ||
483 | *status=STATE_UNKNOWN; | ||
484 | } else { | ||
485 | /* finally, calculate the average offset */ | ||
486 | for(i=0; i<servers[best_index].num_responses;i++){ | ||
487 | avg_offset+=servers[best_index].offset[j]; | ||
488 | } | ||
489 | avg_offset/=servers[best_index].num_responses; | ||
490 | *stratum = servers[best_index].stratum; | ||
491 | } | ||
492 | |||
493 | /* cleanup */ | ||
494 | /* FIXME: Not closing the socket to avoid re-use of the local port | ||
495 | * which can cause old NTP packets to be read instead of NTP control | ||
496 | * pactets in jitter_request(). THERE MUST BE ANOTHER WAY... | ||
497 | * for(j=0; j<num_hosts; j++){ close(socklist[j]); } */ | ||
498 | free(socklist); | ||
499 | free(ufds); | ||
500 | free(servers); | ||
501 | free(req); | ||
502 | freeaddrinfo(ai); | ||
503 | |||
504 | if(verbose) printf("overall average offset: %.10g\n", avg_offset); | ||
505 | return avg_offset; | ||
506 | } | ||
507 | |||
508 | void | ||
509 | setup_control_request(ntp_control_message *p, uint8_t opcode, uint16_t seq){ | ||
510 | memset(p, 0, sizeof(ntp_control_message)); | ||
511 | LI_SET(p->flags, LI_NOWARNING); | ||
512 | VN_SET(p->flags, VN_RESERVED); | ||
513 | MODE_SET(p->flags, MODE_CONTROLMSG); | ||
514 | OP_SET(p->op, opcode); | ||
515 | p->seq = htons(seq); | ||
516 | /* Remaining fields are zero for requests */ | ||
517 | } | ||
518 | |||
519 | /* XXX handle responses with the error bit set */ | ||
520 | double jitter_request(const char *host, int *status){ | ||
521 | int conn=-1, i, npeers=0, num_candidates=0, syncsource_found=0; | ||
522 | int run=0, min_peer_sel=PEER_INCLUDED, num_selected=0, num_valid=0; | ||
523 | int peers_size=0, peer_offset=0; | ||
524 | ntp_assoc_status_pair *peers=NULL; | ||
525 | ntp_control_message req; | ||
526 | const char *getvar = "jitter"; | ||
527 | double rval = 0.0, jitter = -1.0; | ||
528 | char *startofvalue=NULL, *nptr=NULL; | ||
529 | void *tmp; | ||
530 | |||
531 | /* Long-winded explanation: | ||
532 | * Getting the jitter requires a number of steps: | ||
533 | * 1) Send a READSTAT request. | ||
534 | * 2) Interpret the READSTAT reply | ||
535 | * a) The data section contains a list of peer identifiers (16 bits) | ||
536 | * and associated status words (16 bits) | ||
537 | * b) We want the value of 0x06 in the SEL (peer selection) value, | ||
538 | * which means "current synchronizatin source". If that's missing, | ||
539 | * we take anything better than 0x04 (see the rfc for details) but | ||
540 | * set a minimum of warning. | ||
541 | * 3) Send a READVAR request for information on each peer identified | ||
542 | * in 2b greater than the minimum selection value. | ||
543 | * 4) Extract the jitter value from the data[] (it's ASCII) | ||
544 | */ | ||
545 | my_udp_connect(server_address, 123, &conn); | ||
546 | |||
547 | /* keep sending requests until the server stops setting the | ||
548 | * REM_MORE bit, though usually this is only 1 packet. */ | ||
549 | do{ | ||
550 | setup_control_request(&req, OP_READSTAT, 1); | ||
551 | DBG(printf("sending READSTAT request")); | ||
552 | write(conn, &req, SIZEOF_NTPCM(req)); | ||
553 | DBG(print_ntp_control_message(&req)); | ||
554 | /* Attempt to read the largest size packet possible */ | ||
555 | req.count=htons(MAX_CM_SIZE); | ||
556 | DBG(printf("recieving READSTAT response")) | ||
557 | read(conn, &req, SIZEOF_NTPCM(req)); | ||
558 | DBG(print_ntp_control_message(&req)); | ||
559 | /* Each peer identifier is 4 bytes in the data section, which | ||
560 | * we represent as a ntp_assoc_status_pair datatype. | ||
561 | */ | ||
562 | peers_size+=ntohs(req.count); | ||
563 | if((tmp=realloc(peers, peers_size)) == NULL) | ||
564 | free(peers), die(STATE_UNKNOWN, "can not (re)allocate 'peers' buffer\n"); | ||
565 | peers=tmp; | ||
566 | memcpy((void*)((ptrdiff_t)peers+peer_offset), (void*)req.data, ntohs(req.count)); | ||
567 | npeers=peers_size/sizeof(ntp_assoc_status_pair); | ||
568 | peer_offset+=ntohs(req.count); | ||
569 | } while(req.op&REM_MORE); | ||
570 | |||
571 | /* first, let's find out if we have a sync source, or if there are | ||
572 | * at least some candidates. in the case of the latter we'll issue | ||
573 | * a warning but go ahead with the check on them. */ | ||
574 | for (i = 0; i < npeers; i++){ | ||
575 | if (PEER_SEL(peers[i].status) >= PEER_INCLUDED){ | ||
576 | num_candidates++; | ||
577 | if(PEER_SEL(peers[i].status) >= PEER_SYNCSOURCE){ | ||
578 | syncsource_found=1; | ||
579 | min_peer_sel=PEER_SYNCSOURCE; | ||
580 | } | ||
581 | } | ||
582 | } | ||
583 | if(verbose) printf("%d candiate peers available\n", num_candidates); | ||
584 | if(verbose && syncsource_found) printf("synchronization source found\n"); | ||
585 | if(! syncsource_found){ | ||
586 | *status = STATE_UNKNOWN; | ||
587 | if(verbose) printf("warning: no synchronization source found\n"); | ||
588 | } | ||
589 | |||
590 | |||
591 | for (run=0; run<AVG_NUM; run++){ | ||
592 | if(verbose) printf("jitter run %d of %d\n", run+1, AVG_NUM); | ||
593 | for (i = 0; i < npeers; i++){ | ||
594 | /* Only query this server if it is the current sync source */ | ||
595 | if (PEER_SEL(peers[i].status) >= min_peer_sel){ | ||
596 | num_selected++; | ||
597 | setup_control_request(&req, OP_READVAR, 2); | ||
598 | req.assoc = peers[i].assoc; | ||
599 | /* By spec, putting the variable name "jitter" in the request | ||
600 | * should cause the server to provide _only_ the jitter value. | ||
601 | * thus reducing net traffic, guaranteeing us only a single | ||
602 | * datagram in reply, and making intepretation much simpler | ||
603 | */ | ||
604 | /* Older servers doesn't know what jitter is, so if we get an | ||
605 | * error on the first pass we redo it with "dispersion" */ | ||
606 | strncpy(req.data, getvar, MAX_CM_SIZE-1); | ||
607 | req.count = htons(strlen(getvar)); | ||
608 | DBG(printf("sending READVAR request...\n")); | ||
609 | write(conn, &req, SIZEOF_NTPCM(req)); | ||
610 | DBG(print_ntp_control_message(&req)); | ||
611 | |||
612 | req.count = htons(MAX_CM_SIZE); | ||
613 | DBG(printf("recieving READVAR response...\n")); | ||
614 | read(conn, &req, SIZEOF_NTPCM(req)); | ||
615 | DBG(print_ntp_control_message(&req)); | ||
616 | |||
617 | if(req.op&REM_ERROR && strstr(getvar, "jitter")) { | ||
618 | if(verbose) printf("The 'jitter' command failed (old ntp server?)\nRestarting with 'dispersion'...\n"); | ||
619 | getvar = "dispersion"; | ||
620 | num_selected--; | ||
621 | i--; | ||
622 | continue; | ||
623 | } | ||
624 | |||
625 | /* get to the float value */ | ||
626 | if(verbose) { | ||
627 | printf("parsing jitter from peer %.2x: ", ntohs(peers[i].assoc)); | ||
628 | } | ||
629 | startofvalue = strchr(req.data, '='); | ||
630 | if(startofvalue != NULL) { | ||
631 | startofvalue++; | ||
632 | jitter = strtod(startofvalue, &nptr); | ||
633 | } | ||
634 | if(startofvalue == NULL || startofvalue==nptr){ | ||
635 | printf("warning: unable to read server jitter response.\n"); | ||
636 | *status = STATE_UNKNOWN; | ||
637 | } else { | ||
638 | if(verbose) printf("%g\n", jitter); | ||
639 | num_valid++; | ||
640 | rval += jitter; | ||
641 | } | ||
642 | } | ||
643 | } | ||
644 | if(verbose){ | ||
645 | printf("jitter parsed from %d/%d peers\n", num_valid, num_selected); | ||
646 | } | ||
647 | } | ||
648 | |||
649 | rval = num_valid ? rval / num_valid : -1.0; | ||
650 | |||
651 | close(conn); | ||
652 | if(peers!=NULL) free(peers); | ||
653 | /* If we return -1.0, it means no synchronization source was found */ | ||
654 | return rval; | ||
655 | } | ||
656 | |||
657 | int process_arguments(int argc, char **argv){ | ||
658 | int c; | ||
659 | int option=0; | ||
660 | static struct option longopts[] = { | ||
661 | {"version", no_argument, 0, 'V'}, | ||
662 | {"help", no_argument, 0, 'h'}, | ||
663 | {"verbose", no_argument, 0, 'v'}, | ||
664 | {"use-ipv4", no_argument, 0, '4'}, | ||
665 | {"use-ipv6", no_argument, 0, '6'}, | ||
666 | {"warning", required_argument, 0, 'w'}, | ||
667 | {"critical", required_argument, 0, 'c'}, | ||
668 | {"swarn", required_argument, 0, 'W'}, | ||
669 | {"scrit", required_argument, 0, 'C'}, | ||
670 | {"jwarn", required_argument, 0, 'j'}, | ||
671 | {"jcrit", required_argument, 0, 'k'}, | ||
672 | {"timeout", required_argument, 0, 't'}, | ||
673 | {"hostname", required_argument, 0, 'H'}, | ||
674 | {0, 0, 0, 0} | ||
675 | }; | ||
676 | |||
677 | |||
678 | if (argc < 2) | ||
679 | usage ("\n"); | ||
680 | |||
681 | while (1) { | ||
682 | c = getopt_long (argc, argv, "Vhv46w:c:W:C:j:k:t:H:", longopts, &option); | ||
683 | if (c == -1 || c == EOF || c == 1) | ||
684 | break; | ||
685 | |||
686 | switch (c) { | ||
687 | case 'h': | ||
688 | print_help(); | ||
689 | exit(STATE_OK); | ||
690 | break; | ||
691 | case 'V': | ||
692 | print_revision(progname, revision); | ||
693 | exit(STATE_OK); | ||
694 | break; | ||
695 | case 'v': | ||
696 | verbose++; | ||
697 | break; | ||
698 | case 'w': | ||
699 | do_offset=1; | ||
700 | owarn = optarg; | ||
701 | break; | ||
702 | case 'c': | ||
703 | do_offset=1; | ||
704 | ocrit = optarg; | ||
705 | break; | ||
706 | case 'W': | ||
707 | do_stratum=1; | ||
708 | swarn = optarg; | ||
709 | break; | ||
710 | case 'C': | ||
711 | do_stratum=1; | ||
712 | scrit = optarg; | ||
713 | break; | ||
714 | case 'j': | ||
715 | do_jitter=1; | ||
716 | jwarn = optarg; | ||
717 | break; | ||
718 | case 'k': | ||
719 | do_jitter=1; | ||
720 | jcrit = optarg; | ||
721 | break; | ||
722 | case 'H': | ||
723 | if(is_host(optarg) == FALSE) | ||
724 | usage2(_("Invalid hostname/address"), optarg); | ||
725 | server_address = strdup(optarg); | ||
726 | break; | ||
727 | case 't': | ||
728 | socket_timeout=atoi(optarg); | ||
729 | break; | ||
730 | case '4': | ||
731 | address_family = AF_INET; | ||
732 | break; | ||
733 | case '6': | ||
734 | #ifdef USE_IPV6 | ||
735 | address_family = AF_INET6; | ||
736 | #else | ||
737 | usage4 (_("IPv6 support not available")); | ||
738 | #endif | ||
739 | break; | ||
740 | case '?': | ||
741 | /* print short usage statement if args not parsable */ | ||
742 | usage5 (); | ||
743 | break; | ||
744 | } | ||
745 | } | ||
746 | |||
747 | if(server_address == NULL){ | ||
748 | usage4(_("Hostname was not supplied")); | ||
749 | } | ||
750 | |||
751 | return 0; | ||
752 | } | ||
753 | |||
754 | char *perfd_offset (double offset) | ||
755 | { | ||
756 | return fperfdata ("offset", offset, "s", | ||
757 | TRUE, offset_thresholds->warning->end, | ||
758 | TRUE, offset_thresholds->critical->end, | ||
759 | FALSE, 0, FALSE, 0); | ||
760 | } | ||
761 | |||
762 | char *perfd_jitter (double jitter) | ||
763 | { | ||
764 | return fperfdata ("jitter", jitter, "s", | ||
765 | do_jitter, jitter_thresholds->warning->end, | ||
766 | do_jitter, jitter_thresholds->critical->end, | ||
767 | TRUE, 0, FALSE, 0); | ||
768 | } | ||
769 | |||
770 | char *perfd_stratum (int stratum) | ||
771 | { | ||
772 | return perfdata ("stratum", stratum, "", | ||
773 | do_stratum, (int)stratum_thresholds->warning->end, | ||
774 | do_stratum, (int)stratum_thresholds->critical->end, | ||
775 | TRUE, 0, TRUE, 16); | ||
776 | } | ||
777 | |||
778 | int main(int argc, char *argv[]){ | ||
779 | int result, offset_result, jitter_result, stratum; | ||
780 | double offset=0, jitter=0; | ||
781 | char *result_line, *perfdata_line; | ||
782 | |||
783 | result = offset_result = jitter_result = STATE_OK; | ||
784 | |||
785 | if (process_arguments (argc, argv) == ERROR) | ||
786 | usage4 (_("Could not parse arguments")); | ||
787 | |||
788 | set_thresholds(&offset_thresholds, owarn, ocrit); | ||
789 | set_thresholds(&jitter_thresholds, jwarn, jcrit); | ||
790 | set_thresholds(&stratum_thresholds, swarn, scrit); | ||
791 | |||
792 | /* initialize alarm signal handling */ | ||
793 | signal (SIGALRM, socket_timeout_alarm_handler); | ||
794 | |||
795 | /* set socket timeout */ | ||
796 | alarm (socket_timeout); | ||
797 | |||
798 | offset = offset_request(server_address, &stratum, &offset_result); | ||
799 | if (do_offset && offset_result == STATE_UNKNOWN) { | ||
800 | result = STATE_CRITICAL; | ||
801 | } else { | ||
802 | result = get_status(fabs(offset), offset_thresholds); | ||
803 | } | ||
804 | result = max_state(result, offset_result); | ||
805 | if(do_stratum) | ||
806 | result = max_state(result, get_status(stratum, stratum_thresholds)); | ||
807 | |||
808 | /* If not told to check the jitter, we don't even send packets. | ||
809 | * jitter is checked using NTP control packets, which not all | ||
810 | * servers recognize. Trying to check the jitter on OpenNTPD | ||
811 | * (for example) will result in an error | ||
812 | */ | ||
813 | if(do_jitter){ | ||
814 | jitter=jitter_request(server_address, &jitter_result); | ||
815 | result = max_state(result, get_status(jitter, jitter_thresholds)); | ||
816 | /* -1 indicates that we couldn't calculate the jitter | ||
817 | * Only overrides STATE_OK from the offset */ | ||
818 | if(jitter == -1.0 && result == STATE_OK) | ||
819 | result = STATE_UNKNOWN; | ||
820 | } | ||
821 | result = max_state(result, jitter_result); | ||
822 | |||
823 | switch (result) { | ||
824 | case STATE_CRITICAL : | ||
825 | asprintf(&result_line, "NTP CRITICAL:"); | ||
826 | break; | ||
827 | case STATE_WARNING : | ||
828 | asprintf(&result_line, "NTP WARNING:"); | ||
829 | break; | ||
830 | case STATE_OK : | ||
831 | asprintf(&result_line, "NTP OK:"); | ||
832 | break; | ||
833 | default : | ||
834 | asprintf(&result_line, "NTP UNKNOWN:"); | ||
835 | break; | ||
836 | } | ||
837 | if(offset_result == STATE_UNKNOWN){ | ||
838 | asprintf(&result_line, "%s %s", result_line, _("Offset unknown")); | ||
839 | asprintf(&perfdata_line, ""); | ||
840 | } else { | ||
841 | #if 0 /* 2007-10-25 This can't happen. Leftovers or uninplemented? */ | ||
842 | if(offset_result==STATE_WARNING){ | ||
843 | asprintf(&result_line, "%s %s", result_line, _("Unable to fully sample sync server")); | ||
844 | } | ||
845 | #endif | ||
846 | asprintf(&result_line, "%s Offset %.10g secs", result_line, offset); | ||
847 | asprintf(&perfdata_line, "%s", perfd_offset(offset)); | ||
848 | } | ||
849 | if (do_jitter) { | ||
850 | asprintf(&result_line, "%s, jitter=%f", result_line, jitter); | ||
851 | asprintf(&perfdata_line, "%s %s", perfdata_line, perfd_jitter(jitter)); | ||
852 | } | ||
853 | if (do_stratum) { | ||
854 | asprintf(&result_line, "%s, stratum=%i", result_line, stratum); | ||
855 | asprintf(&perfdata_line, "%s %s", perfdata_line, perfd_stratum(stratum)); | ||
856 | } | ||
857 | printf("%s|%s\n", result_line, perfdata_line); | ||
858 | |||
859 | if(server_address!=NULL) free(server_address); | ||
860 | return result; | ||
861 | } | ||
862 | |||
863 | |||
864 | |||
865 | void print_help(void){ | ||
866 | print_revision(progname, revision); | ||
867 | |||
868 | printf ("Copyright (c) 2006 Sean Finney\n"); | ||
869 | printf (COPYRIGHT, copyright, email); | ||
870 | |||
871 | printf ("%s\n", _("This plugin checks the selected ntp server")); | ||
872 | |||
873 | printf ("\n\n"); | ||
874 | |||
875 | print_usage(); | ||
876 | printf (_(UT_HELP_VRSN)); | ||
877 | printf (_(UT_HOST_PORT), 'p', "123"); | ||
878 | printf (" %s\n", "-w, --warning=THRESHOLD"); | ||
879 | printf (" %s\n", _("Offset to result in warning status (seconds)")); | ||
880 | printf (" %s\n", "-c, --critical=THRESHOLD"); | ||
881 | printf (" %s\n", _("Offset to result in critical status (seconds)")); | ||
882 | printf (" %s\n", "-W, --warning=THRESHOLD"); | ||
883 | printf (" %s\n", _("Warning threshold for stratum")); | ||
884 | printf (" %s\n", "-W, --critical=THRESHOLD"); | ||
885 | printf (" %s\n", _("Critical threshold for stratum")); | ||
886 | printf (" %s\n", "-j, --warning=THRESHOLD"); | ||
887 | printf (" %s\n", _("Warning threshold for jitter")); | ||
888 | printf (" %s\n", "-k, --critical=THRESHOLD"); | ||
889 | printf (" %s\n", _("Critical threshold for jitter")); | ||
890 | printf (_(UT_TIMEOUT), DEFAULT_SOCKET_TIMEOUT); | ||
891 | printf (_(UT_VERBOSE)); | ||
892 | |||
893 | printf("\n"); | ||
894 | printf("%s\n", _("Notes:")); | ||
895 | printf(" %s\n", _("See:")); | ||
896 | printf(" %s\n", ("http://nagiosplug.sourceforge.net/developer-guidelines.html#THRESHOLDFORMAT")); | ||
897 | printf(" %s\n", _("for THRESHOLD format and examples.")); | ||
898 | |||
899 | printf("\n"); | ||
900 | printf("%s\n", _("Examples:")); | ||
901 | printf(" %s\n", _("Normal offset check:")); | ||
902 | printf(" %s\n", ("./check_ntp -H ntpserv -w 0.5 -c 1")); | ||
903 | printf(" %s\n", _("Check jitter too, avoiding critical notifications if jitter isn't available")); | ||
904 | printf(" %s\n", _("(See Notes above for more details on thresholds formats):")); | ||
905 | printf(" %s\n", ("./check_ntp -H ntpserv -w 0.5 -c 1 -j -1:100 -k -1:200")); | ||
906 | printf(" %s\n", _("Check only stratum:")); | ||
907 | printf(" %s\n", ("./check_ntp -H ntpserv -W 4 -C 6")); | ||
908 | |||
909 | printf (_(UT_SUPPORT)); | ||
910 | } | ||
911 | |||
912 | void | ||
913 | print_usage(void) | ||
914 | { | ||
915 | printf (_("Usage:")); | ||
916 | printf(" %s -H <host> [-w <warn>] [-c <crit>] [-W <warn>] [-C <crit>]\n", progname); | ||
917 | printf(" [-j <warn>] [-k <crit>] [-v verbose]\n"); | ||
918 | } | ||
diff --git a/plugins/check_time_ntp.c b/plugins/check_time_ntp.c new file mode 100644 index 00000000..df6e02f8 --- /dev/null +++ b/plugins/check_time_ntp.c | |||
@@ -0,0 +1,918 @@ | |||
1 | /****************************************************************************** | ||
2 | * | ||
3 | * Nagios check_ntp plugin | ||
4 | * | ||
5 | * License: GPL | ||
6 | * Copyright (c) 2006 sean finney <seanius@seanius.net> | ||
7 | * Copyright (c) 2007 nagios-plugins team | ||
8 | * | ||
9 | * Last Modified: $Date$ | ||
10 | * | ||
11 | * Description: | ||
12 | * | ||
13 | * This file contains the check_ntp plugin | ||
14 | * | ||
15 | * This plugin to check ntp servers independant of any commandline | ||
16 | * programs or external libraries. | ||
17 | * | ||
18 | * | ||
19 | * License Information: | ||
20 | * | ||
21 | * This program is free software; you can redistribute it and/or modify | ||
22 | * it under the terms of the GNU General Public License as published by | ||
23 | * the Free Software Foundation; either version 2 of the License, or | ||
24 | * (at your option) any later version. | ||
25 | * | ||
26 | * This program is distributed in the hope that it will be useful, | ||
27 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
28 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
29 | * GNU General Public License for more details. | ||
30 | * | ||
31 | * You should have received a copy of the GNU General Public License | ||
32 | * along with this program; if not, write to the Free Software | ||
33 | * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. | ||
34 | |||
35 | $Id$ | ||
36 | |||
37 | *****************************************************************************/ | ||
38 | |||
39 | const char *progname = "check_ntp"; | ||
40 | const char *revision = "$Revision$"; | ||
41 | const char *copyright = "2007"; | ||
42 | const char *email = "nagiosplug-devel@lists.sourceforge.net"; | ||
43 | |||
44 | #include "common.h" | ||
45 | #include "netutils.h" | ||
46 | #include "utils.h" | ||
47 | |||
48 | static char *server_address=NULL; | ||
49 | static int verbose=0; | ||
50 | static short do_offset=0; | ||
51 | static char *owarn="60"; | ||
52 | static char *ocrit="120"; | ||
53 | static short do_stratum=0; | ||
54 | static char *swarn="16"; | ||
55 | static char *scrit="16"; | ||
56 | static short do_jitter=0; | ||
57 | static char *jwarn="5000"; | ||
58 | static char *jcrit="10000"; | ||
59 | |||
60 | int process_arguments (int, char **); | ||
61 | thresholds *offset_thresholds = NULL; | ||
62 | thresholds *jitter_thresholds = NULL; | ||
63 | thresholds *stratum_thresholds = NULL; | ||
64 | void print_help (void); | ||
65 | void print_usage (void); | ||
66 | |||
67 | /* number of times to perform each request to get a good average. */ | ||
68 | #define AVG_NUM 4 | ||
69 | |||
70 | /* max size of control message data */ | ||
71 | #define MAX_CM_SIZE 468 | ||
72 | |||
73 | /* this structure holds everything in an ntp request/response as per rfc1305 */ | ||
74 | typedef struct { | ||
75 | uint8_t flags; /* byte with leapindicator,vers,mode. see macros */ | ||
76 | uint8_t stratum; /* clock stratum */ | ||
77 | int8_t poll; /* polling interval */ | ||
78 | int8_t precision; /* precision of the local clock */ | ||
79 | int32_t rtdelay; /* total rt delay, as a fixed point num. see macros */ | ||
80 | uint32_t rtdisp; /* like above, but for max err to primary src */ | ||
81 | uint32_t refid; /* ref clock identifier */ | ||
82 | uint64_t refts; /* reference timestamp. local time local clock */ | ||
83 | uint64_t origts; /* time at which request departed client */ | ||
84 | uint64_t rxts; /* time at which request arrived at server */ | ||
85 | uint64_t txts; /* time at which request departed server */ | ||
86 | } ntp_message; | ||
87 | |||
88 | /* this structure holds data about results from querying offset from a peer */ | ||
89 | typedef struct { | ||
90 | time_t waiting; /* ts set when we started waiting for a response */ | ||
91 | int num_responses; /* number of successfully recieved responses */ | ||
92 | uint8_t stratum; /* copied verbatim from the ntp_message */ | ||
93 | double rtdelay; /* converted from the ntp_message */ | ||
94 | double rtdisp; /* converted from the ntp_message */ | ||
95 | double offset[AVG_NUM]; /* offsets from each response */ | ||
96 | uint8_t flags; /* byte with leapindicator,vers,mode. see macros */ | ||
97 | } ntp_server_results; | ||
98 | |||
99 | /* this structure holds everything in an ntp control message as per rfc1305 */ | ||
100 | typedef struct { | ||
101 | uint8_t flags; /* byte with leapindicator,vers,mode. see macros */ | ||
102 | uint8_t op; /* R,E,M bits and Opcode */ | ||
103 | uint16_t seq; /* Packet sequence */ | ||
104 | uint16_t status; /* Clock status */ | ||
105 | uint16_t assoc; /* Association */ | ||
106 | uint16_t offset; /* Similar to TCP sequence # */ | ||
107 | uint16_t count; /* # bytes of data */ | ||
108 | char data[MAX_CM_SIZE]; /* ASCII data of the request */ | ||
109 | /* NB: not necessarily NULL terminated! */ | ||
110 | } ntp_control_message; | ||
111 | |||
112 | /* this is an association/status-word pair found in control packet reponses */ | ||
113 | typedef struct { | ||
114 | uint16_t assoc; | ||
115 | uint16_t status; | ||
116 | } ntp_assoc_status_pair; | ||
117 | |||
118 | /* bits 1,2 are the leap indicator */ | ||
119 | #define LI_MASK 0xc0 | ||
120 | #define LI(x) ((x&LI_MASK)>>6) | ||
121 | #define LI_SET(x,y) do{ x |= ((y<<6)&LI_MASK); }while(0) | ||
122 | /* and these are the values of the leap indicator */ | ||
123 | #define LI_NOWARNING 0x00 | ||
124 | #define LI_EXTRASEC 0x01 | ||
125 | #define LI_MISSINGSEC 0x02 | ||
126 | #define LI_ALARM 0x03 | ||
127 | /* bits 3,4,5 are the ntp version */ | ||
128 | #define VN_MASK 0x38 | ||
129 | #define VN(x) ((x&VN_MASK)>>3) | ||
130 | #define VN_SET(x,y) do{ x |= ((y<<3)&VN_MASK); }while(0) | ||
131 | #define VN_RESERVED 0x02 | ||
132 | /* bits 6,7,8 are the ntp mode */ | ||
133 | #define MODE_MASK 0x07 | ||
134 | #define MODE(x) (x&MODE_MASK) | ||
135 | #define MODE_SET(x,y) do{ x |= (y&MODE_MASK); }while(0) | ||
136 | /* here are some values */ | ||
137 | #define MODE_CLIENT 0x03 | ||
138 | #define MODE_CONTROLMSG 0x06 | ||
139 | /* In control message, bits 8-10 are R,E,M bits */ | ||
140 | #define REM_MASK 0xe0 | ||
141 | #define REM_RESP 0x80 | ||
142 | #define REM_ERROR 0x40 | ||
143 | #define REM_MORE 0x20 | ||
144 | /* In control message, bits 11 - 15 are opcode */ | ||
145 | #define OP_MASK 0x1f | ||
146 | #define OP_SET(x,y) do{ x |= (y&OP_MASK); }while(0) | ||
147 | #define OP_READSTAT 0x01 | ||
148 | #define OP_READVAR 0x02 | ||
149 | /* In peer status bytes, bits 6,7,8 determine clock selection status */ | ||
150 | #define PEER_SEL(x) ((ntohs(x)>>8)&0x07) | ||
151 | #define PEER_INCLUDED 0x04 | ||
152 | #define PEER_SYNCSOURCE 0x06 | ||
153 | |||
154 | /** | ||
155 | ** a note about the 32-bit "fixed point" numbers: | ||
156 | ** | ||
157 | they are divided into halves, each being a 16-bit int in network byte order: | ||
158 | - the first 16 bits are an int on the left side of a decimal point. | ||
159 | - the second 16 bits represent a fraction n/(2^16) | ||
160 | likewise for the 64-bit "fixed point" numbers with everything doubled :) | ||
161 | **/ | ||
162 | |||
163 | /* macros to access the left/right 16 bits of a 32-bit ntp "fixed point" | ||
164 | number. note that these can be used as lvalues too */ | ||
165 | #define L16(x) (((uint16_t*)&x)[0]) | ||
166 | #define R16(x) (((uint16_t*)&x)[1]) | ||
167 | /* macros to access the left/right 32 bits of a 64-bit ntp "fixed point" | ||
168 | number. these too can be used as lvalues */ | ||
169 | #define L32(x) (((uint32_t*)&x)[0]) | ||
170 | #define R32(x) (((uint32_t*)&x)[1]) | ||
171 | |||
172 | /* ntp wants seconds since 1/1/00, epoch is 1/1/70. this is the difference */ | ||
173 | #define EPOCHDIFF 0x83aa7e80UL | ||
174 | |||
175 | /* extract a 32-bit ntp fixed point number into a double */ | ||
176 | #define NTP32asDOUBLE(x) (ntohs(L16(x)) + (double)ntohs(R16(x))/65536.0) | ||
177 | |||
178 | /* likewise for a 64-bit ntp fp number */ | ||
179 | #define NTP64asDOUBLE(n) (double)(((uint64_t)n)?\ | ||
180 | (ntohl(L32(n))-EPOCHDIFF) + \ | ||
181 | (.00000001*(0.5+(double)(ntohl(R32(n))/42.94967296))):\ | ||
182 | 0) | ||
183 | |||
184 | /* convert a struct timeval to a double */ | ||
185 | #define TVasDOUBLE(x) (double)(x.tv_sec+(0.000001*x.tv_usec)) | ||
186 | |||
187 | /* convert an ntp 64-bit fp number to a struct timeval */ | ||
188 | #define NTP64toTV(n,t) \ | ||
189 | do{ if(!n) t.tv_sec = t.tv_usec = 0; \ | ||
190 | else { \ | ||
191 | t.tv_sec=ntohl(L32(n))-EPOCHDIFF; \ | ||
192 | t.tv_usec=(int)(0.5+(double)(ntohl(R32(n))/4294.967296)); \ | ||
193 | } \ | ||
194 | }while(0) | ||
195 | |||
196 | /* convert a struct timeval to an ntp 64-bit fp number */ | ||
197 | #define TVtoNTP64(t,n) \ | ||
198 | do{ if(!t.tv_usec && !t.tv_sec) n=0x0UL; \ | ||
199 | else { \ | ||
200 | L32(n)=htonl(t.tv_sec + EPOCHDIFF); \ | ||
201 | R32(n)=htonl((uint64_t)((4294.967296*t.tv_usec)+.5)); \ | ||
202 | } \ | ||
203 | } while(0) | ||
204 | |||
205 | /* NTP control message header is 12 bytes, plus any data in the data | ||
206 | * field, plus null padding to the nearest 32-bit boundary per rfc. | ||
207 | */ | ||
208 | #define SIZEOF_NTPCM(m) (12+ntohs(m.count)+((m.count)?4-(ntohs(m.count)%4):0)) | ||
209 | |||
210 | /* finally, a little helper or two for debugging: */ | ||
211 | #define DBG(x) do{if(verbose>1){ x; }}while(0); | ||
212 | #define PRINTSOCKADDR(x) \ | ||
213 | do{ \ | ||
214 | printf("%u.%u.%u.%u", (x>>24)&0xff, (x>>16)&0xff, (x>>8)&0xff, x&0xff);\ | ||
215 | }while(0); | ||
216 | |||
217 | /* calculate the offset of the local clock */ | ||
218 | static inline double calc_offset(const ntp_message *m, const struct timeval *t){ | ||
219 | double client_tx, peer_rx, peer_tx, client_rx; | ||
220 | client_tx = NTP64asDOUBLE(m->origts); | ||
221 | peer_rx = NTP64asDOUBLE(m->rxts); | ||
222 | peer_tx = NTP64asDOUBLE(m->txts); | ||
223 | client_rx=TVasDOUBLE((*t)); | ||
224 | return (.5*((peer_tx-client_rx)+(peer_rx-client_tx))); | ||
225 | } | ||
226 | |||
227 | /* print out a ntp packet in human readable/debuggable format */ | ||
228 | void print_ntp_message(const ntp_message *p){ | ||
229 | struct timeval ref, orig, rx, tx; | ||
230 | |||
231 | NTP64toTV(p->refts,ref); | ||
232 | NTP64toTV(p->origts,orig); | ||
233 | NTP64toTV(p->rxts,rx); | ||
234 | NTP64toTV(p->txts,tx); | ||
235 | |||
236 | printf("packet contents:\n"); | ||
237 | printf("\tflags: 0x%.2x\n", p->flags); | ||
238 | printf("\t li=%d (0x%.2x)\n", LI(p->flags), p->flags&LI_MASK); | ||
239 | printf("\t vn=%d (0x%.2x)\n", VN(p->flags), p->flags&VN_MASK); | ||
240 | printf("\t mode=%d (0x%.2x)\n", MODE(p->flags), p->flags&MODE_MASK); | ||
241 | printf("\tstratum = %d\n", p->stratum); | ||
242 | printf("\tpoll = %g\n", pow(2, p->poll)); | ||
243 | printf("\tprecision = %g\n", pow(2, p->precision)); | ||
244 | printf("\trtdelay = %-.16g\n", NTP32asDOUBLE(p->rtdelay)); | ||
245 | printf("\trtdisp = %-.16g\n", NTP32asDOUBLE(p->rtdisp)); | ||
246 | printf("\trefid = %x\n", p->refid); | ||
247 | printf("\trefts = %-.16g\n", NTP64asDOUBLE(p->refts)); | ||
248 | printf("\torigts = %-.16g\n", NTP64asDOUBLE(p->origts)); | ||
249 | printf("\trxts = %-.16g\n", NTP64asDOUBLE(p->rxts)); | ||
250 | printf("\ttxts = %-.16g\n", NTP64asDOUBLE(p->txts)); | ||
251 | } | ||
252 | |||
253 | void print_ntp_control_message(const ntp_control_message *p){ | ||
254 | int i=0, numpeers=0; | ||
255 | const ntp_assoc_status_pair *peer=NULL; | ||
256 | |||
257 | printf("control packet contents:\n"); | ||
258 | printf("\tflags: 0x%.2x , 0x%.2x\n", p->flags, p->op); | ||
259 | printf("\t li=%d (0x%.2x)\n", LI(p->flags), p->flags&LI_MASK); | ||
260 | printf("\t vn=%d (0x%.2x)\n", VN(p->flags), p->flags&VN_MASK); | ||
261 | printf("\t mode=%d (0x%.2x)\n", MODE(p->flags), p->flags&MODE_MASK); | ||
262 | printf("\t response=%d (0x%.2x)\n", (p->op&REM_RESP)>0, p->op&REM_RESP); | ||
263 | printf("\t more=%d (0x%.2x)\n", (p->op&REM_MORE)>0, p->op&REM_MORE); | ||
264 | printf("\t error=%d (0x%.2x)\n", (p->op&REM_ERROR)>0, p->op&REM_ERROR); | ||
265 | printf("\t op=%d (0x%.2x)\n", p->op&OP_MASK, p->op&OP_MASK); | ||
266 | printf("\tsequence: %d (0x%.2x)\n", ntohs(p->seq), ntohs(p->seq)); | ||
267 | printf("\tstatus: %d (0x%.2x)\n", ntohs(p->status), ntohs(p->status)); | ||
268 | printf("\tassoc: %d (0x%.2x)\n", ntohs(p->assoc), ntohs(p->assoc)); | ||
269 | printf("\toffset: %d (0x%.2x)\n", ntohs(p->offset), ntohs(p->offset)); | ||
270 | printf("\tcount: %d (0x%.2x)\n", ntohs(p->count), ntohs(p->count)); | ||
271 | numpeers=ntohs(p->count)/(sizeof(ntp_assoc_status_pair)); | ||
272 | if(p->op&REM_RESP && p->op&OP_READSTAT){ | ||
273 | peer=(ntp_assoc_status_pair*)p->data; | ||
274 | for(i=0;i<numpeers;i++){ | ||
275 | printf("\tpeer id %.2x status %.2x", | ||
276 | ntohs(peer[i].assoc), ntohs(peer[i].status)); | ||
277 | if (PEER_SEL(peer[i].status) >= PEER_INCLUDED){ | ||
278 | if(PEER_SEL(peer[i].status) >= PEER_SYNCSOURCE){ | ||
279 | printf(" <-- current sync source"); | ||
280 | } else { | ||
281 | printf(" <-- current sync candidate"); | ||
282 | } | ||
283 | } | ||
284 | printf("\n"); | ||
285 | } | ||
286 | } | ||
287 | } | ||
288 | |||
289 | void setup_request(ntp_message *p){ | ||
290 | struct timeval t; | ||
291 | |||
292 | memset(p, 0, sizeof(ntp_message)); | ||
293 | LI_SET(p->flags, LI_ALARM); | ||
294 | VN_SET(p->flags, 4); | ||
295 | MODE_SET(p->flags, MODE_CLIENT); | ||
296 | p->poll=4; | ||
297 | p->precision=(int8_t)0xfa; | ||
298 | L16(p->rtdelay)=htons(1); | ||
299 | L16(p->rtdisp)=htons(1); | ||
300 | |||
301 | gettimeofday(&t, NULL); | ||
302 | TVtoNTP64(t,p->txts); | ||
303 | } | ||
304 | |||
305 | /* select the "best" server from a list of servers, and return its index. | ||
306 | * this is done by filtering servers based on stratum, dispersion, and | ||
307 | * finally round-trip delay. */ | ||
308 | int best_offset_server(const ntp_server_results *slist, int nservers){ | ||
309 | int i=0, j=0, cserver=0, candidates[5], csize=0; | ||
310 | |||
311 | /* for each server */ | ||
312 | for(cserver=0; cserver<nservers; cserver++){ | ||
313 | /* sort out servers with error flags */ | ||
314 | if ( LI(slist[cserver].flags) != LI_NOWARNING ){ | ||
315 | if (verbose) printf("discarding peer id %d: flags=%d\n", cserver, LI(slist[cserver].flags)); | ||
316 | break; | ||
317 | } | ||
318 | |||
319 | /* compare it to each of the servers already in the candidate list */ | ||
320 | for(i=0; i<csize; i++){ | ||
321 | /* does it have an equal or better stratum? */ | ||
322 | if(slist[cserver].stratum <= slist[i].stratum){ | ||
323 | /* does it have an equal or better dispersion? */ | ||
324 | if(slist[cserver].rtdisp <= slist[i].rtdisp){ | ||
325 | /* does it have a better rtdelay? */ | ||
326 | if(slist[cserver].rtdelay < slist[i].rtdelay){ | ||
327 | break; | ||
328 | } | ||
329 | } | ||
330 | } | ||
331 | } | ||
332 | |||
333 | /* if we haven't reached the current list's end, move everyone | ||
334 | * over one to the right, and insert the new candidate */ | ||
335 | if(i<csize){ | ||
336 | for(j=5; j>i; j--){ | ||
337 | candidates[j]=candidates[j-1]; | ||
338 | } | ||
339 | } | ||
340 | /* regardless, if they should be on the list... */ | ||
341 | if(i<5) { | ||
342 | candidates[i]=cserver; | ||
343 | if(csize<5) csize++; | ||
344 | /* otherwise discard the server */ | ||
345 | } else { | ||
346 | DBG(printf("discarding peer id %d\n", cserver)); | ||
347 | } | ||
348 | } | ||
349 | |||
350 | if(csize>0) { | ||
351 | DBG(printf("best server selected: peer %d\n", candidates[0])); | ||
352 | return candidates[0]; | ||
353 | } else { | ||
354 | DBG(printf("no peers meeting synchronization criteria :(\n")); | ||
355 | return -1; | ||
356 | } | ||
357 | } | ||
358 | |||
359 | /* do everything we need to get the total average offset | ||
360 | * - we use a certain amount of parallelization with poll() to ensure | ||
361 | * we don't waste time sitting around waiting for single packets. | ||
362 | * - we also "manually" handle resolving host names and connecting, because | ||
363 | * we have to do it in a way that our lazy macros don't handle currently :( */ | ||
364 | double offset_request(const char *host, int *stratum, int *status){ | ||
365 | int i=0, j=0, ga_result=0, num_hosts=0, *socklist=NULL, respnum=0; | ||
366 | int servers_completed=0, one_written=0, one_read=0, servers_readable=0, best_index=-1; | ||
367 | time_t now_time=0, start_ts=0; | ||
368 | ntp_message *req=NULL; | ||
369 | double avg_offset=0.; | ||
370 | struct timeval recv_time; | ||
371 | struct addrinfo *ai=NULL, *ai_tmp=NULL, hints; | ||
372 | struct pollfd *ufds=NULL; | ||
373 | ntp_server_results *servers=NULL; | ||
374 | |||
375 | /* setup hints to only return results from getaddrinfo that we'd like */ | ||
376 | memset(&hints, 0, sizeof(struct addrinfo)); | ||
377 | hints.ai_family = address_family; | ||
378 | hints.ai_protocol = IPPROTO_UDP; | ||
379 | hints.ai_socktype = SOCK_DGRAM; | ||
380 | |||
381 | /* fill in ai with the list of hosts resolved by the host name */ | ||
382 | ga_result = getaddrinfo(host, "123", &hints, &ai); | ||
383 | if(ga_result!=0){ | ||
384 | die(STATE_UNKNOWN, "error getting address for %s: %s\n", | ||
385 | host, gai_strerror(ga_result)); | ||
386 | } | ||
387 | |||
388 | /* count the number of returned hosts, and allocate stuff accordingly */ | ||
389 | for(ai_tmp=ai; ai_tmp!=NULL; ai_tmp=ai_tmp->ai_next){ num_hosts++; } | ||
390 | req=(ntp_message*)malloc(sizeof(ntp_message)*num_hosts); | ||
391 | if(req==NULL) die(STATE_UNKNOWN, "can not allocate ntp message array"); | ||
392 | socklist=(int*)malloc(sizeof(int)*num_hosts); | ||
393 | if(socklist==NULL) die(STATE_UNKNOWN, "can not allocate socket array"); | ||
394 | ufds=(struct pollfd*)malloc(sizeof(struct pollfd)*num_hosts); | ||
395 | if(ufds==NULL) die(STATE_UNKNOWN, "can not allocate socket array"); | ||
396 | servers=(ntp_server_results*)malloc(sizeof(ntp_server_results)*num_hosts); | ||
397 | if(servers==NULL) die(STATE_UNKNOWN, "can not allocate server array"); | ||
398 | memset(servers, 0, sizeof(ntp_server_results)*num_hosts); | ||
399 | |||
400 | /* setup each socket for writing, and the corresponding struct pollfd */ | ||
401 | ai_tmp=ai; | ||
402 | for(i=0;ai_tmp;i++){ | ||
403 | socklist[i]=socket(ai_tmp->ai_family, SOCK_DGRAM, IPPROTO_UDP); | ||
404 | if(socklist[i] == -1) { | ||
405 | perror(NULL); | ||
406 | die(STATE_UNKNOWN, "can not create new socket"); | ||
407 | } | ||
408 | if(connect(socklist[i], ai_tmp->ai_addr, ai_tmp->ai_addrlen)){ | ||
409 | die(STATE_UNKNOWN, "can't create socket connection"); | ||
410 | } else { | ||
411 | ufds[i].fd=socklist[i]; | ||
412 | ufds[i].events=POLLIN; | ||
413 | ufds[i].revents=0; | ||
414 | } | ||
415 | ai_tmp = ai_tmp->ai_next; | ||
416 | } | ||
417 | |||
418 | /* now do AVG_NUM checks to each host. we stop before timeout/2 seconds | ||
419 | * have passed in order to ensure post-processing and jitter time. */ | ||
420 | now_time=start_ts=time(NULL); | ||
421 | while(servers_completed<num_hosts && now_time-start_ts <= socket_timeout/2){ | ||
422 | /* loop through each server and find each one which hasn't | ||
423 | * been touched in the past second or so and is still lacking | ||
424 | * some responses. for each of these servers, send a new request, | ||
425 | * and update the "waiting" timestamp with the current time. */ | ||
426 | one_written=0; | ||
427 | now_time=time(NULL); | ||
428 | |||
429 | for(i=0; i<num_hosts; i++){ | ||
430 | if(servers[i].waiting<now_time && servers[i].num_responses<AVG_NUM){ | ||
431 | if(verbose && servers[i].waiting != 0) printf("re-"); | ||
432 | if(verbose) printf("sending request to peer %d\n", i); | ||
433 | setup_request(&req[i]); | ||
434 | write(socklist[i], &req[i], sizeof(ntp_message)); | ||
435 | servers[i].waiting=now_time; | ||
436 | one_written=1; | ||
437 | break; | ||
438 | } | ||
439 | } | ||
440 | |||
441 | /* quickly poll for any sockets with pending data */ | ||
442 | servers_readable=poll(ufds, num_hosts, 100); | ||
443 | if(servers_readable==-1){ | ||
444 | perror("polling ntp sockets"); | ||
445 | die(STATE_UNKNOWN, "communication errors"); | ||
446 | } | ||
447 | |||
448 | /* read from any sockets with pending data */ | ||
449 | for(i=0; servers_readable && i<num_hosts; i++){ | ||
450 | if(ufds[i].revents&POLLIN && servers[i].num_responses < AVG_NUM){ | ||
451 | if(verbose) { | ||
452 | printf("response from peer %d: ", i); | ||
453 | } | ||
454 | |||
455 | read(ufds[i].fd, &req[i], sizeof(ntp_message)); | ||
456 | gettimeofday(&recv_time, NULL); | ||
457 | DBG(print_ntp_message(&req[i])); | ||
458 | respnum=servers[i].num_responses++; | ||
459 | servers[i].offset[respnum]=calc_offset(&req[i], &recv_time); | ||
460 | if(verbose) { | ||
461 | printf("offset %.10g, stratum %i\n", servers[i].offset[respnum], req[i].stratum); | ||
462 | } | ||
463 | servers[i].stratum=req[i].stratum; | ||
464 | servers[i].rtdisp=NTP32asDOUBLE(req[i].rtdisp); | ||
465 | servers[i].rtdelay=NTP32asDOUBLE(req[i].rtdelay); | ||
466 | servers[i].waiting=0; | ||
467 | servers[i].flags=req[i].flags; | ||
468 | servers_readable--; | ||
469 | one_read = 1; | ||
470 | if(servers[i].num_responses==AVG_NUM) servers_completed++; | ||
471 | } | ||
472 | } | ||
473 | /* lather, rinse, repeat. */ | ||
474 | } | ||
475 | |||
476 | if (one_read == 0) { | ||
477 | die(STATE_CRITICAL, "NTP CRITICAL: No response from NTP server\n"); | ||
478 | } | ||
479 | |||
480 | /* now, pick the best server from the list */ | ||
481 | best_index=best_offset_server(servers, num_hosts); | ||
482 | if(best_index < 0){ | ||
483 | *status=STATE_UNKNOWN; | ||
484 | } else { | ||
485 | /* finally, calculate the average offset */ | ||
486 | for(i=0; i<servers[best_index].num_responses;i++){ | ||
487 | avg_offset+=servers[best_index].offset[j]; | ||
488 | } | ||
489 | avg_offset/=servers[best_index].num_responses; | ||
490 | *stratum = servers[best_index].stratum; | ||
491 | } | ||
492 | |||
493 | /* cleanup */ | ||
494 | /* FIXME: Not closing the socket to avoid re-use of the local port | ||
495 | * which can cause old NTP packets to be read instead of NTP control | ||
496 | * pactets in jitter_request(). THERE MUST BE ANOTHER WAY... | ||
497 | * for(j=0; j<num_hosts; j++){ close(socklist[j]); } */ | ||
498 | free(socklist); | ||
499 | free(ufds); | ||
500 | free(servers); | ||
501 | free(req); | ||
502 | freeaddrinfo(ai); | ||
503 | |||
504 | if(verbose) printf("overall average offset: %.10g\n", avg_offset); | ||
505 | return avg_offset; | ||
506 | } | ||
507 | |||
508 | void | ||
509 | setup_control_request(ntp_control_message *p, uint8_t opcode, uint16_t seq){ | ||
510 | memset(p, 0, sizeof(ntp_control_message)); | ||
511 | LI_SET(p->flags, LI_NOWARNING); | ||
512 | VN_SET(p->flags, VN_RESERVED); | ||
513 | MODE_SET(p->flags, MODE_CONTROLMSG); | ||
514 | OP_SET(p->op, opcode); | ||
515 | p->seq = htons(seq); | ||
516 | /* Remaining fields are zero for requests */ | ||
517 | } | ||
518 | |||
519 | /* XXX handle responses with the error bit set */ | ||
520 | double jitter_request(const char *host, int *status){ | ||
521 | int conn=-1, i, npeers=0, num_candidates=0, syncsource_found=0; | ||
522 | int run=0, min_peer_sel=PEER_INCLUDED, num_selected=0, num_valid=0; | ||
523 | int peers_size=0, peer_offset=0; | ||
524 | ntp_assoc_status_pair *peers=NULL; | ||
525 | ntp_control_message req; | ||
526 | const char *getvar = "jitter"; | ||
527 | double rval = 0.0, jitter = -1.0; | ||
528 | char *startofvalue=NULL, *nptr=NULL; | ||
529 | void *tmp; | ||
530 | |||
531 | /* Long-winded explanation: | ||
532 | * Getting the jitter requires a number of steps: | ||
533 | * 1) Send a READSTAT request. | ||
534 | * 2) Interpret the READSTAT reply | ||
535 | * a) The data section contains a list of peer identifiers (16 bits) | ||
536 | * and associated status words (16 bits) | ||
537 | * b) We want the value of 0x06 in the SEL (peer selection) value, | ||
538 | * which means "current synchronizatin source". If that's missing, | ||
539 | * we take anything better than 0x04 (see the rfc for details) but | ||
540 | * set a minimum of warning. | ||
541 | * 3) Send a READVAR request for information on each peer identified | ||
542 | * in 2b greater than the minimum selection value. | ||
543 | * 4) Extract the jitter value from the data[] (it's ASCII) | ||
544 | */ | ||
545 | my_udp_connect(server_address, 123, &conn); | ||
546 | |||
547 | /* keep sending requests until the server stops setting the | ||
548 | * REM_MORE bit, though usually this is only 1 packet. */ | ||
549 | do{ | ||
550 | setup_control_request(&req, OP_READSTAT, 1); | ||
551 | DBG(printf("sending READSTAT request")); | ||
552 | write(conn, &req, SIZEOF_NTPCM(req)); | ||
553 | DBG(print_ntp_control_message(&req)); | ||
554 | /* Attempt to read the largest size packet possible */ | ||
555 | req.count=htons(MAX_CM_SIZE); | ||
556 | DBG(printf("recieving READSTAT response")) | ||
557 | read(conn, &req, SIZEOF_NTPCM(req)); | ||
558 | DBG(print_ntp_control_message(&req)); | ||
559 | /* Each peer identifier is 4 bytes in the data section, which | ||
560 | * we represent as a ntp_assoc_status_pair datatype. | ||
561 | */ | ||
562 | peers_size+=ntohs(req.count); | ||
563 | if((tmp=realloc(peers, peers_size)) == NULL) | ||
564 | free(peers), die(STATE_UNKNOWN, "can not (re)allocate 'peers' buffer\n"); | ||
565 | peers=tmp; | ||
566 | memcpy((void*)((ptrdiff_t)peers+peer_offset), (void*)req.data, ntohs(req.count)); | ||
567 | npeers=peers_size/sizeof(ntp_assoc_status_pair); | ||
568 | peer_offset+=ntohs(req.count); | ||
569 | } while(req.op&REM_MORE); | ||
570 | |||
571 | /* first, let's find out if we have a sync source, or if there are | ||
572 | * at least some candidates. in the case of the latter we'll issue | ||
573 | * a warning but go ahead with the check on them. */ | ||
574 | for (i = 0; i < npeers; i++){ | ||
575 | if (PEER_SEL(peers[i].status) >= PEER_INCLUDED){ | ||
576 | num_candidates++; | ||
577 | if(PEER_SEL(peers[i].status) >= PEER_SYNCSOURCE){ | ||
578 | syncsource_found=1; | ||
579 | min_peer_sel=PEER_SYNCSOURCE; | ||
580 | } | ||
581 | } | ||
582 | } | ||
583 | if(verbose) printf("%d candiate peers available\n", num_candidates); | ||
584 | if(verbose && syncsource_found) printf("synchronization source found\n"); | ||
585 | if(! syncsource_found){ | ||
586 | *status = STATE_UNKNOWN; | ||
587 | if(verbose) printf("warning: no synchronization source found\n"); | ||
588 | } | ||
589 | |||
590 | |||
591 | for (run=0; run<AVG_NUM; run++){ | ||
592 | if(verbose) printf("jitter run %d of %d\n", run+1, AVG_NUM); | ||
593 | for (i = 0; i < npeers; i++){ | ||
594 | /* Only query this server if it is the current sync source */ | ||
595 | if (PEER_SEL(peers[i].status) >= min_peer_sel){ | ||
596 | num_selected++; | ||
597 | setup_control_request(&req, OP_READVAR, 2); | ||
598 | req.assoc = peers[i].assoc; | ||
599 | /* By spec, putting the variable name "jitter" in the request | ||
600 | * should cause the server to provide _only_ the jitter value. | ||
601 | * thus reducing net traffic, guaranteeing us only a single | ||
602 | * datagram in reply, and making intepretation much simpler | ||
603 | */ | ||
604 | /* Older servers doesn't know what jitter is, so if we get an | ||
605 | * error on the first pass we redo it with "dispersion" */ | ||
606 | strncpy(req.data, getvar, MAX_CM_SIZE-1); | ||
607 | req.count = htons(strlen(getvar)); | ||
608 | DBG(printf("sending READVAR request...\n")); | ||
609 | write(conn, &req, SIZEOF_NTPCM(req)); | ||
610 | DBG(print_ntp_control_message(&req)); | ||
611 | |||
612 | req.count = htons(MAX_CM_SIZE); | ||
613 | DBG(printf("recieving READVAR response...\n")); | ||
614 | read(conn, &req, SIZEOF_NTPCM(req)); | ||
615 | DBG(print_ntp_control_message(&req)); | ||
616 | |||
617 | if(req.op&REM_ERROR && strstr(getvar, "jitter")) { | ||
618 | if(verbose) printf("The 'jitter' command failed (old ntp server?)\nRestarting with 'dispersion'...\n"); | ||
619 | getvar = "dispersion"; | ||
620 | num_selected--; | ||
621 | i--; | ||
622 | continue; | ||
623 | } | ||
624 | |||
625 | /* get to the float value */ | ||
626 | if(verbose) { | ||
627 | printf("parsing jitter from peer %.2x: ", ntohs(peers[i].assoc)); | ||
628 | } | ||
629 | startofvalue = strchr(req.data, '='); | ||
630 | if(startofvalue != NULL) { | ||
631 | startofvalue++; | ||
632 | jitter = strtod(startofvalue, &nptr); | ||
633 | } | ||
634 | if(startofvalue == NULL || startofvalue==nptr){ | ||
635 | printf("warning: unable to read server jitter response.\n"); | ||
636 | *status = STATE_UNKNOWN; | ||
637 | } else { | ||
638 | if(verbose) printf("%g\n", jitter); | ||
639 | num_valid++; | ||
640 | rval += jitter; | ||
641 | } | ||
642 | } | ||
643 | } | ||
644 | if(verbose){ | ||
645 | printf("jitter parsed from %d/%d peers\n", num_valid, num_selected); | ||
646 | } | ||
647 | } | ||
648 | |||
649 | rval = num_valid ? rval / num_valid : -1.0; | ||
650 | |||
651 | close(conn); | ||
652 | if(peers!=NULL) free(peers); | ||
653 | /* If we return -1.0, it means no synchronization source was found */ | ||
654 | return rval; | ||
655 | } | ||
656 | |||
657 | int process_arguments(int argc, char **argv){ | ||
658 | int c; | ||
659 | int option=0; | ||
660 | static struct option longopts[] = { | ||
661 | {"version", no_argument, 0, 'V'}, | ||
662 | {"help", no_argument, 0, 'h'}, | ||
663 | {"verbose", no_argument, 0, 'v'}, | ||
664 | {"use-ipv4", no_argument, 0, '4'}, | ||
665 | {"use-ipv6", no_argument, 0, '6'}, | ||
666 | {"warning", required_argument, 0, 'w'}, | ||
667 | {"critical", required_argument, 0, 'c'}, | ||
668 | {"swarn", required_argument, 0, 'W'}, | ||
669 | {"scrit", required_argument, 0, 'C'}, | ||
670 | {"jwarn", required_argument, 0, 'j'}, | ||
671 | {"jcrit", required_argument, 0, 'k'}, | ||
672 | {"timeout", required_argument, 0, 't'}, | ||
673 | {"hostname", required_argument, 0, 'H'}, | ||
674 | {0, 0, 0, 0} | ||
675 | }; | ||
676 | |||
677 | |||
678 | if (argc < 2) | ||
679 | usage ("\n"); | ||
680 | |||
681 | while (1) { | ||
682 | c = getopt_long (argc, argv, "Vhv46w:c:W:C:j:k:t:H:", longopts, &option); | ||
683 | if (c == -1 || c == EOF || c == 1) | ||
684 | break; | ||
685 | |||
686 | switch (c) { | ||
687 | case 'h': | ||
688 | print_help(); | ||
689 | exit(STATE_OK); | ||
690 | break; | ||
691 | case 'V': | ||
692 | print_revision(progname, revision); | ||
693 | exit(STATE_OK); | ||
694 | break; | ||
695 | case 'v': | ||
696 | verbose++; | ||
697 | break; | ||
698 | case 'w': | ||
699 | do_offset=1; | ||
700 | owarn = optarg; | ||
701 | break; | ||
702 | case 'c': | ||
703 | do_offset=1; | ||
704 | ocrit = optarg; | ||
705 | break; | ||
706 | case 'W': | ||
707 | do_stratum=1; | ||
708 | swarn = optarg; | ||
709 | break; | ||
710 | case 'C': | ||
711 | do_stratum=1; | ||
712 | scrit = optarg; | ||
713 | break; | ||
714 | case 'j': | ||
715 | do_jitter=1; | ||
716 | jwarn = optarg; | ||
717 | break; | ||
718 | case 'k': | ||
719 | do_jitter=1; | ||
720 | jcrit = optarg; | ||
721 | break; | ||
722 | case 'H': | ||
723 | if(is_host(optarg) == FALSE) | ||
724 | usage2(_("Invalid hostname/address"), optarg); | ||
725 | server_address = strdup(optarg); | ||
726 | break; | ||
727 | case 't': | ||
728 | socket_timeout=atoi(optarg); | ||
729 | break; | ||
730 | case '4': | ||
731 | address_family = AF_INET; | ||
732 | break; | ||
733 | case '6': | ||
734 | #ifdef USE_IPV6 | ||
735 | address_family = AF_INET6; | ||
736 | #else | ||
737 | usage4 (_("IPv6 support not available")); | ||
738 | #endif | ||
739 | break; | ||
740 | case '?': | ||
741 | /* print short usage statement if args not parsable */ | ||
742 | usage5 (); | ||
743 | break; | ||
744 | } | ||
745 | } | ||
746 | |||
747 | if(server_address == NULL){ | ||
748 | usage4(_("Hostname was not supplied")); | ||
749 | } | ||
750 | |||
751 | return 0; | ||
752 | } | ||
753 | |||
754 | char *perfd_offset (double offset) | ||
755 | { | ||
756 | return fperfdata ("offset", offset, "s", | ||
757 | TRUE, offset_thresholds->warning->end, | ||
758 | TRUE, offset_thresholds->critical->end, | ||
759 | FALSE, 0, FALSE, 0); | ||
760 | } | ||
761 | |||
762 | char *perfd_jitter (double jitter) | ||
763 | { | ||
764 | return fperfdata ("jitter", jitter, "s", | ||
765 | do_jitter, jitter_thresholds->warning->end, | ||
766 | do_jitter, jitter_thresholds->critical->end, | ||
767 | TRUE, 0, FALSE, 0); | ||
768 | } | ||
769 | |||
770 | char *perfd_stratum (int stratum) | ||
771 | { | ||
772 | return perfdata ("stratum", stratum, "", | ||
773 | do_stratum, (int)stratum_thresholds->warning->end, | ||
774 | do_stratum, (int)stratum_thresholds->critical->end, | ||
775 | TRUE, 0, TRUE, 16); | ||
776 | } | ||
777 | |||
778 | int main(int argc, char *argv[]){ | ||
779 | int result, offset_result, jitter_result, stratum; | ||
780 | double offset=0, jitter=0; | ||
781 | char *result_line, *perfdata_line; | ||
782 | |||
783 | result = offset_result = jitter_result = STATE_OK; | ||
784 | |||
785 | if (process_arguments (argc, argv) == ERROR) | ||
786 | usage4 (_("Could not parse arguments")); | ||
787 | |||
788 | set_thresholds(&offset_thresholds, owarn, ocrit); | ||
789 | set_thresholds(&jitter_thresholds, jwarn, jcrit); | ||
790 | set_thresholds(&stratum_thresholds, swarn, scrit); | ||
791 | |||
792 | /* initialize alarm signal handling */ | ||
793 | signal (SIGALRM, socket_timeout_alarm_handler); | ||
794 | |||
795 | /* set socket timeout */ | ||
796 | alarm (socket_timeout); | ||
797 | |||
798 | offset = offset_request(server_address, &stratum, &offset_result); | ||
799 | if (do_offset && offset_result == STATE_UNKNOWN) { | ||
800 | result = STATE_CRITICAL; | ||
801 | } else { | ||
802 | result = get_status(fabs(offset), offset_thresholds); | ||
803 | } | ||
804 | result = max_state(result, offset_result); | ||
805 | if(do_stratum) | ||
806 | result = max_state(result, get_status(stratum, stratum_thresholds)); | ||
807 | |||
808 | /* If not told to check the jitter, we don't even send packets. | ||
809 | * jitter is checked using NTP control packets, which not all | ||
810 | * servers recognize. Trying to check the jitter on OpenNTPD | ||
811 | * (for example) will result in an error | ||
812 | */ | ||
813 | if(do_jitter){ | ||
814 | jitter=jitter_request(server_address, &jitter_result); | ||
815 | result = max_state(result, get_status(jitter, jitter_thresholds)); | ||
816 | /* -1 indicates that we couldn't calculate the jitter | ||
817 | * Only overrides STATE_OK from the offset */ | ||
818 | if(jitter == -1.0 && result == STATE_OK) | ||
819 | result = STATE_UNKNOWN; | ||
820 | } | ||
821 | result = max_state(result, jitter_result); | ||
822 | |||
823 | switch (result) { | ||
824 | case STATE_CRITICAL : | ||
825 | asprintf(&result_line, "NTP CRITICAL:"); | ||
826 | break; | ||
827 | case STATE_WARNING : | ||
828 | asprintf(&result_line, "NTP WARNING:"); | ||
829 | break; | ||
830 | case STATE_OK : | ||
831 | asprintf(&result_line, "NTP OK:"); | ||
832 | break; | ||
833 | default : | ||
834 | asprintf(&result_line, "NTP UNKNOWN:"); | ||
835 | break; | ||
836 | } | ||
837 | if(offset_result == STATE_UNKNOWN){ | ||
838 | asprintf(&result_line, "%s %s", result_line, _("Offset unknown")); | ||
839 | asprintf(&perfdata_line, ""); | ||
840 | } else { | ||
841 | #if 0 /* 2007-10-25 This can't happen. Leftovers or uninplemented? */ | ||
842 | if(offset_result==STATE_WARNING){ | ||
843 | asprintf(&result_line, "%s %s", result_line, _("Unable to fully sample sync server")); | ||
844 | } | ||
845 | #endif | ||
846 | asprintf(&result_line, "%s Offset %.10g secs", result_line, offset); | ||
847 | asprintf(&perfdata_line, "%s", perfd_offset(offset)); | ||
848 | } | ||
849 | if (do_jitter) { | ||
850 | asprintf(&result_line, "%s, jitter=%f", result_line, jitter); | ||
851 | asprintf(&perfdata_line, "%s %s", perfdata_line, perfd_jitter(jitter)); | ||
852 | } | ||
853 | if (do_stratum) { | ||
854 | asprintf(&result_line, "%s, stratum=%i", result_line, stratum); | ||
855 | asprintf(&perfdata_line, "%s %s", perfdata_line, perfd_stratum(stratum)); | ||
856 | } | ||
857 | printf("%s|%s\n", result_line, perfdata_line); | ||
858 | |||
859 | if(server_address!=NULL) free(server_address); | ||
860 | return result; | ||
861 | } | ||
862 | |||
863 | |||
864 | |||
865 | void print_help(void){ | ||
866 | print_revision(progname, revision); | ||
867 | |||
868 | printf ("Copyright (c) 2006 Sean Finney\n"); | ||
869 | printf (COPYRIGHT, copyright, email); | ||
870 | |||
871 | printf ("%s\n", _("This plugin checks the selected ntp server")); | ||
872 | |||
873 | printf ("\n\n"); | ||
874 | |||
875 | print_usage(); | ||
876 | printf (_(UT_HELP_VRSN)); | ||
877 | printf (_(UT_HOST_PORT), 'p', "123"); | ||
878 | printf (" %s\n", "-w, --warning=THRESHOLD"); | ||
879 | printf (" %s\n", _("Offset to result in warning status (seconds)")); | ||
880 | printf (" %s\n", "-c, --critical=THRESHOLD"); | ||
881 | printf (" %s\n", _("Offset to result in critical status (seconds)")); | ||
882 | printf (" %s\n", "-W, --warning=THRESHOLD"); | ||
883 | printf (" %s\n", _("Warning threshold for stratum")); | ||
884 | printf (" %s\n", "-W, --critical=THRESHOLD"); | ||
885 | printf (" %s\n", _("Critical threshold for stratum")); | ||
886 | printf (" %s\n", "-j, --warning=THRESHOLD"); | ||
887 | printf (" %s\n", _("Warning threshold for jitter")); | ||
888 | printf (" %s\n", "-k, --critical=THRESHOLD"); | ||
889 | printf (" %s\n", _("Critical threshold for jitter")); | ||
890 | printf (_(UT_TIMEOUT), DEFAULT_SOCKET_TIMEOUT); | ||
891 | printf (_(UT_VERBOSE)); | ||
892 | |||
893 | printf("\n"); | ||
894 | printf("%s\n", _("Notes:")); | ||
895 | printf(" %s\n", _("See:")); | ||
896 | printf(" %s\n", ("http://nagiosplug.sourceforge.net/developer-guidelines.html#THRESHOLDFORMAT")); | ||
897 | printf(" %s\n", _("for THRESHOLD format and examples.")); | ||
898 | |||
899 | printf("\n"); | ||
900 | printf("%s\n", _("Examples:")); | ||
901 | printf(" %s\n", _("Normal offset check:")); | ||
902 | printf(" %s\n", ("./check_ntp -H ntpserv -w 0.5 -c 1")); | ||
903 | printf(" %s\n", _("Check jitter too, avoiding critical notifications if jitter isn't available")); | ||
904 | printf(" %s\n", _("(See Notes above for more details on thresholds formats):")); | ||
905 | printf(" %s\n", ("./check_ntp -H ntpserv -w 0.5 -c 1 -j -1:100 -k -1:200")); | ||
906 | printf(" %s\n", _("Check only stratum:")); | ||
907 | printf(" %s\n", ("./check_ntp -H ntpserv -W 4 -C 6")); | ||
908 | |||
909 | printf (_(UT_SUPPORT)); | ||
910 | } | ||
911 | |||
912 | void | ||
913 | print_usage(void) | ||
914 | { | ||
915 | printf (_("Usage:")); | ||
916 | printf(" %s -H <host> [-w <warn>] [-c <crit>] [-W <warn>] [-C <crit>]\n", progname); | ||
917 | printf(" [-j <warn>] [-k <crit>] [-v verbose]\n"); | ||
918 | } | ||