diff options
author | Thomas Guyot-Sionnest <dermoth@users.sourceforge.net> | 2007-11-26 10:23:25 +0000 |
---|---|---|
committer | Thomas Guyot-Sionnest <dermoth@users.sourceforge.net> | 2007-11-26 10:23:25 +0000 |
commit | ef1863eed928df0933148ad1bc613618cd93a0cc (patch) | |
tree | 6f2142d8fb6b2d42958d0ebb60b9e86b40dc4804 /plugins | |
parent | 986d0ecf3612547f480b8ab85b6aefce2ce2a5b7 (diff) | |
download | monitoring-plugins-ef1863eed928df0933148ad1bc613618cd93a0cc.tar.gz |
- fix segfault in extract_value called with no data (should never happen though)
- Will now retry with empty getvar request if all else fail and parse what it can
- check_ntp used to get the average of all candidates of there's no sync peer. Average of a clock offset isn't obvious, so it gets the offset, jitter and stratum of the best candidate (based of current ooffset)
- Various fixes
- check conversion of stratum (using strtol instead of atoi)
git-svn-id: https://nagiosplug.svn.sourceforge.net/svnroot/nagiosplug/nagiosplug/branches/dermoth_ntp_rework@1835 f882894a-f735-0410-b71e-b25c423dba1c
Diffstat (limited to 'plugins')
-rw-r--r-- | plugins/check_ntp_peer.c | 130 |
1 files changed, 82 insertions, 48 deletions
diff --git a/plugins/check_ntp_peer.c b/plugins/check_ntp_peer.c index 016a034e..5d21832b 100644 --- a/plugins/check_ntp_peer.c +++ b/plugins/check_ntp_peer.c | |||
@@ -180,6 +180,10 @@ char *extract_value(const char *varlist, const char *name){ | |||
180 | char *tmpvarlist=NULL, *tmpkey=NULL, *value=NULL; | 180 | char *tmpvarlist=NULL, *tmpkey=NULL, *value=NULL; |
181 | int last=0; | 181 | int last=0; |
182 | 182 | ||
183 | /* The following code require a non-empty varlist */ | ||
184 | if(strlen(varlist) == 0) | ||
185 | return NULL; | ||
186 | |||
183 | tmpvarlist = strdup(varlist); | 187 | tmpvarlist = strdup(varlist); |
184 | tmpkey = strtok(tmpvarlist, "="); | 188 | tmpkey = strtok(tmpvarlist, "="); |
185 | 189 | ||
@@ -204,21 +208,32 @@ setup_control_request(ntp_control_message *p, uint8_t opcode, uint16_t seq){ | |||
204 | /* Remaining fields are zero for requests */ | 208 | /* Remaining fields are zero for requests */ |
205 | } | 209 | } |
206 | 210 | ||
207 | /* XXX handle responses with the error bit set */ | 211 | /* This function does all the actual work; roughly here's what it does |
212 | * beside setting the offest, jitter and stratum passed as argument: | ||
213 | * - offset can be negative, so if it cannot get the offset, offset_result | ||
214 | * is set to UNKNOWN, otherwise OK. | ||
215 | * - jitter and stratum are set to -1 if they cannot be retrieved so any | ||
216 | * positive value means a success retrieving the value. | ||
217 | * - status is set to WARNING if there's no sync.peer (otherwise OK) and is | ||
218 | * the return value of the function. | ||
219 | * status is pretty much useless as syncsource_found is a global variable | ||
220 | * used later in main to check is the server was synchronized. It works | ||
221 | * so I left it alone, but it can be repurposed if needed */ | ||
208 | int ntp_request(const char *host, double *offset, int *offset_result, double *jitter, int *stratum){ | 222 | int ntp_request(const char *host, double *offset, int *offset_result, double *jitter, int *stratum){ |
209 | int conn=-1, i, npeers=0, num_candidates=0; | 223 | int conn=-1, i, npeers=0, num_candidates=0; |
224 | double tmp_offset = 0; | ||
210 | int min_peer_sel=PEER_INCLUDED; | 225 | int min_peer_sel=PEER_INCLUDED; |
211 | int peers_size=0, peer_offset=0; | 226 | int peers_size=0, peer_offset=0; |
212 | int status; | 227 | int status; |
213 | ntp_assoc_status_pair *peers=NULL; | 228 | ntp_assoc_status_pair *peers=NULL; |
214 | ntp_control_message req; | 229 | ntp_control_message req; |
215 | const char *getvar = "stratum,offset,jitter"; | 230 | const char *getvar = "stratum,offset,jitter"; |
231 | char *data=""; | ||
216 | char *value=NULL, *nptr=NULL; | 232 | char *value=NULL, *nptr=NULL; |
217 | void *tmp; | 233 | void *tmp; |
218 | 234 | ||
219 | status = STATE_OK; | 235 | status = STATE_OK; |
220 | *offset_result = STATE_UNKNOWN; | 236 | *offset_result = STATE_UNKNOWN; |
221 | *jitter = *stratum = -1; | ||
222 | 237 | ||
223 | /* Long-winded explanation: | 238 | /* Long-winded explanation: |
224 | * Getting the sync peer offset, jitter and stratum requires a number of | 239 | * Getting the sync peer offset, jitter and stratum requires a number of |
@@ -284,87 +299,104 @@ int ntp_request(const char *host, double *offset, int *offset_result, double *ji | |||
284 | 299 | ||
285 | for (i = 0; i < npeers; i++){ | 300 | for (i = 0; i < npeers; i++){ |
286 | /* Only query this server if it is the current sync source */ | 301 | /* Only query this server if it is the current sync source */ |
302 | /* If there's no sync.peer, query all candidates and use the best one */ | ||
287 | if (PEER_SEL(peers[i].status) >= min_peer_sel){ | 303 | if (PEER_SEL(peers[i].status) >= min_peer_sel){ |
288 | if(verbose) printf("Getting offset, jitter and stratum for peer %.2x\n", ntohs(peers[i].assoc)); | 304 | if(verbose) printf("Getting offset, jitter and stratum for peer %.2x\n", ntohs(peers[i].assoc)); |
289 | setup_control_request(&req, OP_READVAR, 2); | 305 | data = "\0"; |
290 | req.assoc = peers[i].assoc; | 306 | do{ |
291 | /* Putting the wanted variable names in the request | 307 | setup_control_request(&req, OP_READVAR, 2); |
292 | * cause the server to provide _only_ the requested values. | 308 | req.assoc = peers[i].assoc; |
293 | * thus reducing net traffic, guaranteeing us only a single | 309 | /* Putting the wanted variable names in the request |
294 | * datagram in reply, and making intepretation much simpler | 310 | * cause the server to provide _only_ the requested values. |
295 | */ | 311 | * thus reducing net traffic, guaranteeing us only a single |
296 | /* Older servers doesn't know what jitter is, so if we get an | 312 | * datagram in reply, and making intepretation much simpler |
297 | * error on the first pass we redo it with "dispersion" */ | 313 | */ |
298 | strncpy(req.data, getvar, MAX_CM_SIZE-1); | 314 | /* Older servers doesn't know what jitter is, so if we get an |
299 | req.count = htons(strlen(getvar)); | 315 | * error on the first pass we redo it with "dispersion" */ |
300 | DBG(printf("sending READVAR request...\n")); | 316 | strncpy(req.data, getvar, MAX_CM_SIZE-1); |
301 | write(conn, &req, SIZEOF_NTPCM(req)); | 317 | req.count = htons(strlen(getvar)); |
302 | DBG(print_ntp_control_message(&req)); | 318 | DBG(printf("sending READVAR request...\n")); |
303 | 319 | write(conn, &req, SIZEOF_NTPCM(req)); | |
304 | req.count = htons(MAX_CM_SIZE); | 320 | DBG(print_ntp_control_message(&req)); |
305 | DBG(printf("recieving READVAR response...\n")); | 321 | |
306 | read(conn, &req, SIZEOF_NTPCM(req)); | 322 | req.count = htons(MAX_CM_SIZE); |
307 | DBG(print_ntp_control_message(&req)); | 323 | DBG(printf("receiving READVAR response...\n")); |
308 | 324 | read(conn, &req, SIZEOF_NTPCM(req)); | |
309 | if(req.op&REM_ERROR && strstr(getvar, "jitter")) { | 325 | DBG(print_ntp_control_message(&req)); |
310 | if(verbose) printf("The 'jitter' command failed (old ntp server?)\nRestarting with 'dispersion'...\n"); | 326 | |
311 | getvar = "stratum,offset,dispersion"; | 327 | if(!(req.op&REM_ERROR)) |
312 | i--; | 328 | asprintf(&data, "%s%s", data, req.data); |
313 | continue; | 329 | } while(req.op&REM_MORE); |
330 | |||
331 | if(req.op&REM_ERROR) { | ||
332 | if(strstr(getvar, "jitter")) { | ||
333 | if(verbose) printf("The command failed. This is usually caused by servers refusing the 'jitter'\nvariable. Restarting with 'dispersion'...\n"); | ||
334 | getvar = "stratum,offset,dispersion"; | ||
335 | i--; | ||
336 | continue; | ||
337 | } else if(strlen(getvar)) { | ||
338 | if(verbose) printf("Server didn't like dispersion either; will retrieve everything\n"); | ||
339 | getvar = ""; | ||
340 | i--; | ||
341 | continue; | ||
342 | } | ||
314 | } | 343 | } |
315 | 344 | ||
316 | if(verbose > 1) | 345 | if(verbose > 1) |
317 | printf("Server responded: >>>%s<<<\n", req.data); | 346 | printf("Server responded: >>>%s<<<\n", data); |
318 | 347 | ||
319 | /* get the offset */ | 348 | /* get the offset */ |
320 | if(verbose) | 349 | if(verbose) |
321 | printf("parsing offset from peer %.2x: ", ntohs(peers[i].assoc)); | 350 | printf("parsing offset from peer %.2x: ", ntohs(peers[i].assoc)); |
322 | 351 | ||
323 | value = extract_value(req.data, "offset"); | 352 | value = extract_value(data, "offset"); |
324 | /* Convert the value if we have one */ | 353 | /* Convert the value if we have one */ |
325 | if(value != NULL) | 354 | if(value != NULL) |
326 | *offset = strtod(value, &nptr) / 1000; | 355 | tmp_offset = strtod(value, &nptr) / 1000; |
327 | /* If value is null or no conversion was performed */ | 356 | /* If value is null or no conversion was performed */ |
328 | if(value == NULL || value==nptr) { | 357 | if(value == NULL || value==nptr) { |
329 | printf("warning: unable to read server offset response.\n"); | 358 | if(verbose) printf("error: unable to read server offset response.\n"); |
330 | status = max_state_alt(status, STATE_CRITICAL); | ||
331 | } else { | 359 | } else { |
332 | *offset_result = STATE_OK; | ||
333 | if(verbose) printf("%g\n", *offset); | 360 | if(verbose) printf("%g\n", *offset); |
361 | if(*offset_result == STATE_UNKNOWN || fabs(tmp_offset) < fabs(*offset)) { | ||
362 | *offset = tmp_offset; | ||
363 | *offset_result = STATE_OK; | ||
364 | } else { | ||
365 | /* Skip this one; move to the next */ | ||
366 | continue; | ||
367 | } | ||
334 | } | 368 | } |
335 | 369 | ||
336 | if(do_jitter) { | 370 | if(do_jitter) { |
337 | /* first reset the pointers */ | ||
338 | value = NULL, nptr=NULL; | ||
339 | /* get the jitter */ | 371 | /* get the jitter */ |
340 | if(verbose) { | 372 | if(verbose) { |
341 | printf("parsing jitter from peer %.2x: ", ntohs(peers[i].assoc)); | 373 | printf("parsing %s from peer %.2x: ", strstr(getvar, "dispersion") != NULL ? "dispersion" : "jitter", ntohs(peers[i].assoc)); |
342 | } | 374 | } |
343 | value = extract_value(req.data, strstr(getvar, "dispersion") != NULL ? "dispersion" : "jitter"); | 375 | value = extract_value(data, strstr(getvar, "dispersion") != NULL ? "dispersion" : "jitter"); |
344 | /* Convert the value if we have one */ | 376 | /* Convert the value if we have one */ |
345 | if(value != NULL) | 377 | if(value != NULL) |
346 | *jitter = strtod(value, &nptr); | 378 | *jitter = strtod(value, &nptr); |
347 | /* If value is null or no conversion was performed */ | 379 | /* If value is null or no conversion was performed */ |
348 | if(value == NULL || value==nptr){ | 380 | if(value == NULL || value==nptr){ |
349 | printf("warning: unable to read server jitter response.\n"); | 381 | if(verbose) printf("error: unable to read server jitter response.\n"); |
350 | status = max_state_alt(status, STATE_UNKNOWN); | 382 | *jitter = -1; |
351 | } else { | 383 | } else { |
352 | if(verbose) printf("%g\n", *jitter); | 384 | if(verbose) printf("%g\n", *jitter); |
353 | } | 385 | } |
354 | } | 386 | } |
355 | 387 | ||
356 | if(do_stratum) { | 388 | if(do_stratum) { |
357 | value = NULL; | ||
358 | /* get the stratum */ | 389 | /* get the stratum */ |
359 | if(verbose) { | 390 | if(verbose) { |
360 | printf("parsing stratum from peer %.2x: ", ntohs(peers[i].assoc)); | 391 | printf("parsing stratum from peer %.2x: ", ntohs(peers[i].assoc)); |
361 | } | 392 | } |
362 | value = extract_value(req.data, "stratum"); | 393 | value = extract_value(data, "stratum"); |
363 | if(value == NULL){ | 394 | if(value != NULL) |
364 | printf("warning: unable to read server stratum response.\n"); | 395 | *stratum = strtol(value, &nptr, 10); |
365 | status = max_state_alt(status, STATE_UNKNOWN); | 396 | if(value == NULL || value==nptr){ |
397 | if(verbose) printf("error: unable to read server stratum response.\n"); | ||
398 | *stratum = -1; | ||
366 | } else { | 399 | } else { |
367 | *stratum = atoi(value); | ||
368 | if(verbose) printf("%i\n", *stratum); | 400 | if(verbose) printf("%i\n", *stratum); |
369 | } | 401 | } |
370 | } | 402 | } |
@@ -503,8 +535,6 @@ int main(int argc, char *argv[]){ | |||
503 | double offset=0, jitter=0; | 535 | double offset=0, jitter=0; |
504 | char *result_line, *perfdata_line; | 536 | char *result_line, *perfdata_line; |
505 | 537 | ||
506 | result = offset_result = STATE_OK; | ||
507 | |||
508 | if (process_arguments (argc, argv) == ERROR) | 538 | if (process_arguments (argc, argv) == ERROR) |
509 | usage4 (_("Could not parse arguments")); | 539 | usage4 (_("Could not parse arguments")); |
510 | 540 | ||
@@ -518,7 +548,11 @@ int main(int argc, char *argv[]){ | |||
518 | /* set socket timeout */ | 548 | /* set socket timeout */ |
519 | alarm (socket_timeout); | 549 | alarm (socket_timeout); |
520 | 550 | ||
551 | /* This returns either OK or WARNING (See comment preceeding ntp_request) */ | ||
521 | result = ntp_request(server_address, &offset, &offset_result, &jitter, &stratum); | 552 | result = ntp_request(server_address, &offset, &offset_result, &jitter, &stratum); |
553 | if(offset_result == STATE_UNKNOWN) | ||
554 | result = STATE_CRITICAL; | ||
555 | |||
522 | result = max_state_alt(result, get_status(fabs(offset), offset_thresholds)); | 556 | result = max_state_alt(result, get_status(fabs(offset), offset_thresholds)); |
523 | 557 | ||
524 | if(do_stratum) | 558 | if(do_stratum) |
@@ -610,7 +644,7 @@ void print_help(void){ | |||
610 | 644 | ||
611 | printf("\n"); | 645 | printf("\n"); |
612 | printf("%s\n", _("Examples:")); | 646 | printf("%s\n", _("Examples:")); |
613 | printf(" %s\n", _("Normal offset check:")); | 647 | printf(" %s\n", _("Normal NTP server check:")); |
614 | printf(" %s\n", ("./check_ntp_peer -H ntpserv -w 0.5 -c 1")); | 648 | printf(" %s\n", ("./check_ntp_peer -H ntpserv -w 0.5 -c 1")); |
615 | printf(" %s\n", _("Check jitter too, avoiding critical notifications if jitter isn't available")); | 649 | printf(" %s\n", _("Check jitter too, avoiding critical notifications if jitter isn't available")); |
616 | printf(" %s\n", _("(See Notes above for more details on thresholds formats):")); | 650 | printf(" %s\n", _("(See Notes above for more details on thresholds formats):")); |