#!/usr/bin/perl -w
#
# (c)1999 Ian Cass, Knowledge Matters Ltd.
# Read the GNU copyright stuff for all the legalese
#
# Check NTP time servers plugin. This plugin requires the ntpdate utility to
# be installed on the system, however since it's part of the ntp suite, you 
# should already have it installed.
#
# $Id$
#
# Nothing clever done in this program - its a very simple bare basics hack to
# get the job done.
#
# Things to do...
# check @words[9] for time differences greater than +/- x secs & return a
# warning.
#
# (c) 1999 Mark Jewiss, Knowledge Matters Limited
# 22-9-1999, 12:45
#
# Modified script to accept 2 parameters or set defaults.
# Now issues warning or critical alert is time difference is greater than the 
# time passed.
#
# These changes have not been tested completely due to the unavailability of a
# server with the incorrect time.
#
# (c) 1999 Bo Kersey, VirCIO - Managed Server Solutions <bo@vircio.com>
# 22-10-99, 12:17
#
# Modified the script to give useage if no parameters are input.
#
# Modified the script to check for negative as well as positive 
# time differences.
#
# Modified the script to work with ntpdate 3-5.93e Wed Apr 14 20:23:03 EDT 1999
#
# Modified the script to work with ntpdate's that return adjust or offset...
#
#
# Script modified 2000 June 01 by William Pietri <william@bianca.com>
#
# Modified script to handle weird cases:
#     o NTP server doesn't respond (e.g., has died)
#     o Server has correct time but isn't suitable synchronization
#           source. This happens while starting up and if contact
#           with master has been lost.
#
# Modifed to run under Embedded Perl  (sghosh@users.sf.net)
#   - combined logic some blocks together..
# 
# Added ntpdate check for stratum 16 desynch peer (James Fidell) Feb 03, 2003
#
# ntpdate - offset is in seconds
# changed ntpdc to ntpq - jitter/dispersion is in milliseconds
#
# Patch for for regex for stratum1 refid.

require 5.004;
use POSIX;
use strict;
use Getopt::Long;
use vars qw($opt_V $opt_h $opt_H $opt_t $opt_w $opt_c $opt_O $opt_j $opt_k $verbose $PROGNAME $def_jitter $ipv4 $ipv6);
use lib utils.pm;
use utils qw($TIMEOUT %ERRORS &print_revision &support);

$PROGNAME="check_ntp";

sub print_help ();
sub print_usage ();

$ENV{'PATH'}='';
$ENV{'BASH_ENV'}='';
$ENV{'ENV'}='';

# defaults in sec
my $DEFAULT_OFFSET_WARN =  60;  # 1 minute
my $DEFAULT_OFFSET_CRIT = 120;  # 2 minutes
# default in millisec
my $DEFAULT_JITTER_WARN =   5000; # 5 sec
my $DEFAULT_JITTER_CRIT =  10000; # 10 sec

Getopt::Long::Configure('bundling');
GetOptions
	("V"   => \$opt_V, "version"    => \$opt_V,
	 "h"   => \$opt_h, "help"       => \$opt_h,
	 "v"   => \$verbose, "verbose" 	=> \$verbose,
	 "4"   => \$ipv4, "use-ipv4"	=> \$ipv4,
	 "6"   => \$ipv6, "use-ipv6"	=> \$ipv6,
	 "w=f" => \$opt_w, "warning=f"  => \$opt_w,   # offset|adjust warning if above this number
	 "c=f" => \$opt_c, "critical=f" => \$opt_c,   # offset|adjust critical if above this number
	 "O"   => \$opt_O, "zero-offset" => \$opt_O,  # zero-offset  bad
	 "j=s" => \$opt_j, "jwarn=i"    => \$opt_j,   # jitter warning if above this number
	 "k=s" => \$opt_k, "jcrit=i"    => \$opt_k,   # jitter critical if above this number
	 "t=s" => \$opt_t, "timeout=i"  => \$opt_t,
	 "H=s" => \$opt_H, "hostname=s" => \$opt_H);

if ($opt_V) {
	print_revision($PROGNAME,'$Revision$ ');
	exit $ERRORS{'OK'};
}

if ($opt_h) {
	print_help();
	exit $ERRORS{'OK'};
}

# jitter test params specified
if (defined $opt_j || defined $opt_k ) {
	$def_jitter = 1;
}

$opt_H = shift unless ($opt_H);
my $host = $1 if ($opt_H && $opt_H =~ m/^([0-9]+\.[0-9]+\.[0-9]+\.[0-9]+|[a-zA-Z][-a-zA-Z0-9]+(\.[a-zA-Z][-a-zA-Z0-9]+)*)$/);
unless ($host) {
	print "No target host specified\n";
	print_usage();
	exit $ERRORS{'UNKNOWN'};
}

my ($timeout, $owarn, $ocrit, $jwarn, $jcrit);

$timeout = $TIMEOUT;
($opt_t) && ($opt_t =~ /^([0-9]+)$/) && ($timeout = $1);

$owarn = $DEFAULT_OFFSET_WARN;
($opt_w) && ($opt_w =~ /^([0-9.]+)$/) && ($owarn = $1);

$ocrit = $DEFAULT_OFFSET_CRIT;
($opt_c) && ($opt_c =~ /^([0-9.]+)$/) && ($ocrit = $1);

$jwarn = $DEFAULT_JITTER_WARN;
($opt_j) && ($opt_j =~ /^([0-9]+)$/) && ($jwarn = $1);

$jcrit = $DEFAULT_JITTER_CRIT;
($opt_k) && ($opt_k =~ /^([0-9]+)$/) && ($jcrit = $1);

if ($ocrit < $owarn ) {
	print "Critical offset should be larger than warning offset\n";
	print_usage();
	exit $ERRORS{"UNKNOWN"};
}

if ($def_jitter) {
	if ($opt_k < $opt_j) {
		print "Critical jitter should be larger than warning jitter\n";
		print_usage();
		exit $ERRORS{'UNKNOWN'};
	}
}


my $stratum = -1;
my $ignoreret = 0;
my $answer = undef;
my $offset = undef;
my $jitter = undef;
my $syspeer = undef;
my $candidate = 0;
my @candidates;
my $msg; # first line of output to print if format is invalid

my $state = $ERRORS{'UNKNOWN'};
my $ntpdate_error = $ERRORS{'UNKNOWN'};
my $jitter_error = $ERRORS{'UNKNOWN'};

# some systems don't have a proper ntpq  (migrated from ntpdc)
my $have_ntpq = undef;
if ($utils::PATH_TO_NTPQ && -x $utils::PATH_TO_NTPQ ) {
	$have_ntpq = 1;  
}else{
	$have_ntpq = 0;
}

# Just in case of problems, let's not hang Nagios
$SIG{'ALRM'} = sub {
	print ("ERROR: No response from ntp server (alarm)\n");
	exit $ERRORS{"UNKNOWN"};
};
alarm($timeout);

# Determine protocol to be used for ntpdate and ntpq
my $ntpdate = $utils::PATH_TO_NTPDATE;
my $ntpq    = $utils::PATH_TO_NTPQ;
if ($ipv4) {
        $ntpdate .= " -4";
        $ntpq .= " -4";
}
elsif ($ipv6) {
        $ntpdate .= " -6";
        $ntpq .= " -6";
}
# else don't use any flags

###
###
### First, check ntpdate
###
###

if (!open (NTPDATE, $ntpdate . " -q $host 2>&1 |")) {
	print "Could not open $ntpdate: $!\n";
	exit $ERRORS{"UNKNOWN"};
}

my $out;
while (<NTPDATE>) {
	#print if ($verbose);  # noop
	$msg = $_ unless ($msg);
	$out .= "$_ ";
	
	if (/stratum\s(\d+)/) {
		$stratum = $1;
	}
	
	if (/(offset|adjust)\s+([-.\d]+)/i) {
		$offset = $2;

		# An offset of 0.000000 with an error is probably bogus. Actually,
		# it's probably always bogus, but let's be paranoid here.
		# Has been reported that 0.0000 happens in a production environment
		# on Solaris 8 so this check should be taken out - SF tracker 1150777
		if (defined $opt_O ) {
			if ($offset == 0) { undef $offset;}
		}

		$ntpdate_error = defined ($offset) ? $ERRORS{"OK"} : $ERRORS{"CRITICAL"};
		print "ntperr = $ntpdate_error \n" if $verbose;
	
	}

	if (/no server suitable for synchronization found/) {
		if ($stratum == 16) {
			$ntpdate_error = $ERRORS{"WARNING"};
			$msg = "Desynchronized peer server found";
			$ignoreret=1;
		}
		else {
			$ntpdate_error = $ERRORS{"CRITICAL"};
			$msg = "No suitable peer server found - ";
		}
	}

}
$out =~ s/\n//g;
close (NTPDATE) || 
    die $! ? "$out - Error closing $ntpdate pipe: $!"
           : "$out - Exit status: $? from $ntpdate\n";

# declare an error if we also get a non-zero return code from ntpdate
# unless already set to critical
if ( $? && !$ignoreret ) {
	print "stderr = $? : $! \n" if $verbose;
	$ntpdate_error = $ntpdate_error == $ERRORS{"CRITICAL"} ? $ERRORS{"CRITICAL"} : $ERRORS{"UNKNOWN"}  ;
	print "ntperr = $ntpdate_error : $!\n" if $verbose;
}

###
###
### Then scan xntpq/ntpq if it exists
### and look in the 11th column for jitter 
###
# Field 1: Tally Code ( Space, 'x','.','-','+','#','*','o')
#           Only match for '*' which implies sys.peer 
#           or 'o' which implies pps.peer
#           If both exist, the last one is picked. 
# Field 2: address of the remote peer
# Field 3: Refid of the clock (0.0.0.0 if unknown, WWWV/PPS/GPS/ACTS/USNO/PCS/... if Stratum1)
# Field 4: stratum (0-15)
# Field 5: Type of the peer: local (l), unicast (u), multicast (m) 
#          broadcast (b); not sure about multicast/broadcast
# Field 6: last packet receive (in seconds)
# Field 7: polling interval
# Field 8: reachability resgister (octal) 
# Field 9: delay
# Field 10: offset
# Field 11: dispersion/jitter
# 
# According to bug 773588 Some solaris xntpd implementations seemto match on
# "#" even though the docs say it exceeds maximum distance. Providing patch
# here which will generate a warining.

if ($have_ntpq) {

	if ( open(NTPQ, $ntpq . " -np $host 2>&1 |") ) {
		while (<NTPQ>) {
			print $_ if ($verbose);
			if ( /timed out/ ){
				$have_ntpq = 0 ;
				last ;
			}
			# number of candidates on <host> for sys.peer
			if (/^(\*|\+|\#|o])/) {
				++$candidate;
				push (@candidates, $_);
				print "Candidate count= $candidate\n" if ($verbose);
			}
			
			# match sys.peer or pps.peer
			if (/^(\*|o)(\S+)\s+(\S+)\s+(\S+)\s+(\S+)\s+(\S+)\s+(\S+)\s+(\S+)\s+(\S+)\s+(\S+)\s+(\S+)/) {
				$syspeer = $2;
				$stratum = $4;
				$jitter = $11;
				print "match $_ \n" if $verbose;
				if ($jitter > $jcrit) {
					print "Jitter_crit = $11 :$jcrit\n" if ($verbose);
					$jitter_error = $ERRORS{'CRITICAL'};
				} elsif ($jitter > $jwarn ) {
					print "Jitter_warn = $11 :$jwarn\n" if ($verbose);
					$jitter_error = $ERRORS{'WARNING'};
				} else {
					$jitter_error = $ERRORS{'OK'};
				}
			} else {
				print "No match!\n" if $verbose;
				$jitter = '(not parsed)';
			}
			
		}
		close NTPQ ||
            die $! ? "Error closing $ntpq pipe: $!"
                   : "Exit status: $? from $ntpq\n";

		# if we did not match sys.peer or pps.peer but matched # candidates only
		# generate a warning 
		# based on bug id 773588
		unless (defined $syspeer) {
			if ($#candidates >=0) {
				foreach my $c (@candidates) {
					$c =~ /^(#)([-0-9.\s]+)\s+([-0-9A-Za-z_().]+)\s+([-0-9.]+)\s+([lumb-]+)\s+([-0-9m.]+)\s+([-0-9.]+)\s+([-0-9.]+)\s+([-0-9.]+)\s+([-0-9.]+)\s+([-0-9.]+)/;
					$syspeer = $2;
					$stratum = $4;
					$jitter = $11;
					print "candidate match $c \n" if $verbose;
					if ($jitter > $jcrit) {
						print "Candidate match - Jitter_crit = $11 :$jcrit\n" if ($verbose);
						$jitter_error = $ERRORS{'CRITICAL'};
					}elsif ($jitter > $jwarn ) {
						print "Candidate match - Jitter_warn = $11 :$jwarn \n" if ($verbose);
						$jitter_error = $ERRORS{'WARNING'};
					} else {
						$jitter_error = $ERRORS{'WARNING'};
					}
				}

			}
		}
	}
}


if ($ntpdate_error != $ERRORS{'OK'}) {
	$state = $ntpdate_error;
	if ($ntpdate_error == $ERRORS{'WARNING'} ) {
		$answer = $msg;
	}
	else {
		$answer = $msg . "Server for ntp probably down";
	}

	if (defined($offset) && abs($offset) > $ocrit) {
		$state = $ERRORS{'CRITICAL'};
		$answer = "Server Error and offset $offset sec > +/- $ocrit sec";
	} elsif (defined($offset) && abs($offset) > $owarn) {
		$answer = "Server error and offset $offset sec > +/- $owarn sec";
	} elsif (defined($jitter) && abs($jitter) > $jcrit) {
		$answer = "Server error and jitter $jitter msec > +/- $jcrit msec";
	} elsif (defined($jitter) && abs($jitter) > $jwarn) {
		$answer = "Server error and jitter $jitter msec > +/- $jwarn msec";
	}

} elsif ($have_ntpq && $jitter_error != $ERRORS{'OK'}) {
	$state = $jitter_error;
	$answer = "Jitter $jitter too high";
	if (defined($offset) && abs($offset) > $ocrit) {
		$state = $ERRORS{'CRITICAL'};
		$answer = "Jitter error and offset $offset sec > +/- $ocrit sec";
	} elsif (defined($offset) && abs($offset) > $owarn) {
		$answer = "Jitter error and offset $offset sec > +/- $owarn sec";
	} elsif (defined($jitter) && abs($jitter) > $jcrit) {
		$answer = "Jitter error and jitter $jitter msec > +/- $jcrit msec";
	} elsif (defined($jitter) && abs($jitter) > $jwarn) {
		$answer = "Jitter error and jitter $jitter msec > +/- $jwarn msec";
	}

} elsif( !$have_ntpq ) { # no errors from ntpdate and no ntpq or ntpq timed out
	if (abs($offset) > $ocrit) {
		$state = $ERRORS{'CRITICAL'};
		$answer = "Offset $offset sec > +/- $ocrit sec";
	} elsif (abs($offset) > $owarn) {
		$state = $ERRORS{'WARNING'};
		$answer = "Offset $offset sec > +/- $owarn sec";
	} elsif (( abs($offset) > $owarn) && $def_jitter ) {
		$state = $ERRORS{'WARNING'};
		$answer = "Offset $offset sec > +/- $owarn sec, ntpq timed out";
	} elsif ( $def_jitter ) {
		$state = $ERRORS{'WARNING'};
		$answer = "Offset $offset secs, ntpq timed out";
	} else{
		$state = $ERRORS{'OK'};
		$answer = "Offset $offset secs";
	}



} else { # no errors from ntpdate or ntpq
	if (abs($offset) > $ocrit) {
		$state = $ERRORS{'CRITICAL'};
		$answer = "Offset $offset sec > +/- $ocrit sec, jitter $jitter msec";
	} elsif (abs($jitter) > $jcrit ) {
		$state = $ERRORS{'CRITICAL'};
		$answer = "Jitter $jitter msec> +/- $jcrit msec, offset $offset sec";
	} elsif (abs($offset) > $owarn) {
		$state = $ERRORS{'WARNING'};
		$answer = "Offset $offset sec > +/- $owarn sec, jitter $jitter msec";
	} elsif (abs($jitter) > $jwarn ) {
		$state = $ERRORS{'WARNING'};
		$answer = "Jitter $jitter msec> +/- $jwarn msec, offset $offset sec";

	} else {
		$state = $ERRORS{'OK'};
		$answer = "Offset $offset secs, jitter $jitter msec, peer is stratum $stratum";
	}
	
}

foreach my $key (keys %ERRORS) {
	if ($state==$ERRORS{$key}) {
#		print ("NTP $key: $answer");
		print ("NTP $key: $answer|offset=$offset, jitter=" . $jitter/1000 .	",peer_stratum=$stratum\n");
		last;
	}
}
exit $state;


####
#### subs

sub print_usage () {
	print "Usage: $PROGNAME -H <host> [-46] [-O] [-w <warn>] [-c <crit>] [-j <warn>] [-k <crit>] [-v verbose]\n";
}

sub print_help () {
	print_revision($PROGNAME,'$Revision$');
	print "Copyright (c) 2003 Bo Kersey/Karl DeBisschop\n";
	print "\n";
	print_usage();
	print "
Checks the local timestamp offset versus <host> with ntpdate
Checks the jitter/dispersion of clock signal between <host> and its sys.peer with ntpq\n
-O (--zero-offset)
     A zero offset on \"ntpdate\" will generate a CRITICAL.\n
-w (--warning)
     Clock offset in seconds at which a warning message will be generated.\n	Defaults to $DEFAULT_OFFSET_WARN.
-c (--critical) 
     Clock offset in seconds at which a critical message will be generated.\n	Defaults to $DEFAULT_OFFSET_CRIT.
-j (--jwarn)
     Clock jitter in milliseconds at which a warning message will be generated.\n	Defaults to $DEFAULT_JITTER_WARN.
-k (--jcrit)
    Clock jitter in milliseconds at which a critical message will be generated.\n	Defaults to $DEFAULT_JITTER_CRIT.
    
    If jitter/dispersion is specified with -j or -k and ntpq times out, then a
    warning is returned.\n
-4 (--use-ipv4)
    Use IPv4 connection
-6 (--use-ipv6)
    Use IPv6 connection
\n";	
support();
}