#!/bin/sh

NTPDRESTART=/var/run/ntpd-restart
NTPDOK=/var/run/ntpd-check-ok
NTPDPPIDFILE=/var/run/ntpd.pid
NTPDCHECKLOCK=/var/lock/ntpd-check

NEEDRESTART=

LOGGER="logger -t ntp-check --id=$$"

# Check if ntpd is running.
if [ ! -e "$NTPDPPIDFILE" ]; then

    $LOGGER "ntpd is not running, quitting"

    exit 0

fi

if [ -e "$NTPDOK" -a "$1" != "--force" ]; then

    # ntpd was already checked as having resolved all its names, no
    # need to check again.
    $LOGGER "previous ntpd check OK and --force not specified, quitting"

    exit 0

fi

# Avoid having two concurrent copies of this script running, as that
# could lead to two copies of ntpd running.

if ! dotlockfile -p -r 0 -l "$NTPDCHECKLOCK"; then

    # Another instance is already running and will take care of this
    $LOGGER "another instance running, quitting"

    exit 0

fi

# we check that the number of active peers of ntp is the same as that
# in the configuration file, if not there was a name lookup failure and
# we need to restart ntp
if [ -z "$NEEDRESTART" ]; then
    numlivepeers="$(ntpq -n -c peers 2>/dev/null | wc -l)"
    numconfpeers="$(grep -c '^server' /etc/ntp/local.conf 2>/dev/null)"
    let numlivepeers=numlivepeers-2 # there are two lines of headers
    let numconfpeers=numconfpeers+1 # there is a fixed extra for local clock

    # note that a server hostname may result in more than one NTP peer
    # (e.g., some pool servers)
    if [ "$numlivepeers" -lt "$numconfpeers" ]; then

        $LOGGER "found $numlivepeers ntp peers but expected at least $numconfpeers, will restart ntpd"
        NEEDRESTART=yes

    fi
fi

ntpd_restart() {
    local n max stopped

    # NOTE: calling /etc/init.d/ntpd restarts may not always work reliably,
    # sometimes ntpd can take long to exit, so poll for it to exit.
    # If the DNS servers do not respond it can take long to resolve all ntp
    # servers during ntp startup (up to 70 seconds per ntp server with three
    # unresponsive DNS servers and mDNS activated) and some versions of ntpd
    # will not process a SIGTERM before finishing startup, so waiting for a
    # just started ntpd server to quit can take up to 350 seconds.

    pkill -0 -x ntpd || return
    /etc/init.d/ntpd stop || return

    n=0
    stopped=
    while [ $n -lt 1800 ]; do
        ! pkill -0 -x ntpd && stopped=1 && break
        ((n++))
        usleep 200000
    done
    if [ -n "$stopped" ]; then
        $LOGGER "ntpd stopped, starting new one"
    else
        $LOGGER "timeout waiting for ntpd to exit, attempting to start a new one anyways"
    fi
    /etc/init.d/ntpd start
}

if [ -n "$NEEDRESTART" ]; then

    [ -e "$NTPDOK" ] && rm -f "$NTPDOK"

    # this file is for watchdog repair binary, removed by the repair binary
    touch "$NTPDRESTART"

    ntpd_restart || $LOGGER "failed to restart ntpd"

else
    # ntpd is all ok, disable further automatic checks
    touch "$NTPDOK"
    $LOGGER "ntpd checked all OK, disabled further checks"
fi

dotlockfile -p -u "$NTPDCHECKLOCK"

exit 0
