usr.sbin/dntpd/client.c

   1 /*
   2  * Copyright (c) 2005 The DragonFly Project.  All rights reserved.
   3  *
   4  * This code is derived from software contributed to The DragonFly Project
   5  * by Matthew Dillon <dillon@backplane.com>
   6  *
   7  * Redistribution and use in source and binary forms, with or without
   8  * modification, are permitted provided that the following conditions
   9  * are met:
  10  *
  11  * 1. Redistributions of source code must retain the above copyright
  12  *    notice, this list of conditions and the following disclaimer.
  13  * 2. Redistributions in binary form must reproduce the above copyright
  14  *    notice, this list of conditions and the following disclaimer in
  15  *    the documentation and/or other materials provided with the
  16  *    distribution.
  17  * 3. Neither the name of The DragonFly Project nor the names of its
  18  *    contributors may be used to endorse or promote products derived
  19  *    from this software without specific, prior written permission.
  20  *
  21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  22  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  23  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
  24  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
  25  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
  26  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
  27  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  28  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
  29  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
  30  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
  31  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  32  * SUCH DAMAGE.
  33  *
  34  * $DragonFly: src/usr.sbin/dntpd/client.c,v 1.9 2005/04/26 23:50:23 dillon Exp $
  35  */
  36
  37 #include "defs.h"
  38
  39 void
  40 client_init(void)
  41 {
  42 }
  43
  44 int
  45 client_main(struct server_info **info_ary, int count)
  46 {
  47     struct server_info *best_off;
  48     struct server_info *best_freq;
  49     double last_freq;
  50     double freq;
  51     double offset;
  52     int i;
  53     int calc_offset_correction;
  54
  55     last_freq = 0.0;
  56
  57     for (;;) {
  58         /*
  59          * Subtract the interval from poll_sleep and poll the client
  60          * if it reaches 0.
  61          *
  62          * Because we do not compensate for offset corrections which are
  63          * in progress, we cannot accumulate data for an offset correction
  64          * while a prior correction is still being worked through by the
  65          * system.
  66          */
  67         calc_offset_correction = !sysntp_offset_correction_is_running();
  68         for (i = 0; i < count; ++i)
  69             client_poll(info_ary[i], min_sleep_opt, calc_offset_correction);
  70
  71         /*
  72          * Find the best client (or synthesize one).  A different client
  73          * can be chosen for frequency and offset.  Note in particular
  74          * that offset counters and averaging code gets reset when an
  75          * offset correction is made (otherwise the averaging history will
  76          * cause later corrections to overshoot).
  77          *
  78          * The regression used to calculate the frequency is a much
  79          * longer-term entity and is NOT reset, so it is still possible
  80          * for the offset correction code to make minor adjustments to
  81          * the frequency if it so desires.
  82          *
  83          * client_check may replace the server_info pointer with a new
  84          * one.
  85          */
  86         best_off = NULL;
  87         best_freq = NULL;
  88         for (i = 0; i < count; ++i)
  89             client_check(&info_ary[i], &best_off, &best_freq);
  90
  91         /*
  92          * Offset correction.
  93          */
  94         if (best_off) {
  95             offset = best_off->lin_sumoffset / best_off->lin_countoffset;
  96             lin_resetalloffsets(info_ary, count);
  97             if (offset < -COURSE_OFFSET_CORRECTION_LIMIT ||
  98                 offset > COURSE_OFFSET_CORRECTION_LIMIT ||
  99                 quickset_opt
 100             ) {
 101                 freq = sysntp_correct_course_offset(offset);
 102                 quickset_opt = 0;
 103             } else {
 104                 freq = sysntp_correct_offset(offset);
 105             }
 106         } else {
 107             freq = 0.0;
 108         }
 109
 110         /*
 111          * Frequency correction (throw away minor freq adjusts from the
 112          * offset code if we can't do a frequency correction here).  Do
 113          * not reissue if it hasn't changed from the last issued correction.
 114          */
 115         if (best_freq) {
 116             freq += best_freq->lin_cache_freq;
 117             if (last_freq != freq) {
 118                 sysntp_correct_freq(freq);
 119                 last_freq = freq;
 120             }
 121         }
 122
 123         /*
 124          * This function is responsible for managing the polling mode and
 125          * figures out how long we should sleep.
 126          */
 127         for (i = 0; i < count; ++i)
 128             client_manage_polling_mode(info_ary[i]);
 129
 130         /*
 131          * Polling loop sleep.
 132          */
 133         usleep(min_sleep_opt * 1000000 + random() % 500000);
 134     }
 135 }
 136
 137 void
 138 client_poll(server_info_t info, int poll_interval, int calc_offset_correction)
 139 {
 140     struct timeval rtv;
 141     struct timeval ltv;
 142     struct timeval lbtv;
 143     double offset;
 144
 145     /*
 146      * By default we always poll.  If the polling interval comes under
 147      * active management the poll_sleep will be non-zero.
 148      */
 149     if (info->poll_sleep > poll_interval) {
 150         info->poll_sleep -= poll_interval;
 151         return;
 152     }
 153     info->poll_sleep = 0;
 154
 155     logdebug(4, "%s: poll, ", info->target);
 156     if (udp_ntptimereq(info->fd, &rtv, &ltv, &lbtv) < 0) {
 157         ++info->poll_failed;
 158         logdebug(4, "no response (%d failures in a row)\n",
 159                 info->poll_failed);
 160         if (info->poll_failed == POLL_FAIL_RESET) {
 161             if (info->lin_count != 0) {
 162                 logdebug(4, "%s: resetting regression due to failures\n",
 163                         info->target);
 164             }
 165             lin_reset(info);
 166         }
 167         return;
 168     }
 169
 170     /*
 171      * Successful query.  Update polling info for the polling mode manager.
 172      */
 173     ++info->poll_count;
 174     info->poll_failed = 0;
 175
 176     /*
 177      * Figure out the offset (the difference between the reported
 178      * time and our current time) for linear regression purposes.
 179      */
 180     offset = tv_delta_double(&rtv, &ltv);
 181
 182     while (info) {
 183         /*
 184          * Linear regression
 185          */
 186         if (debug_level >= 4) {
 187             struct tm *tp;
 188             char buf[64];
 189             time_t t;
 190
 191             t = rtv.tv_sec;
 192             tp = localtime(&t);
 193             strftime(buf, sizeof(buf), "%d-%b-%Y %H:%M:%S", tp);
 194             logdebug(4, "%s.%03ld ", buf, rtv.tv_usec / 1000);
 195         }
 196         lin_regress(info, &ltv, &lbtv, offset, calc_offset_correction);
 197         info = info->altinfo;
 198         if (info && debug_level >= 4) {
 199             logdebug(4, "%*.*s: poll, ",
 200                 (int)strlen(info->target),
 201                 (int)strlen(info->target), "(alt)");
 202         }
 203     }
 204 }
 205
 206 /*
 207  * Find the best client (or synthesize a fake info structure to return).
 208  * We can find separate best clients for offset and frequency.
 209  */
 210 void
 211 client_check(struct server_info **checkp,
 212              struct server_info **best_off,
 213              struct server_info **best_freq)
 214 {
 215     struct server_info *check = *checkp;
 216     struct server_info *info;
 217
 218     /*
 219      * Start an alternate linear regression once our current one
 220      * has passed a certain point.
 221      */
 222     if (check->lin_count >= LIN_RESTART / 2 && check->altinfo == NULL) {
 223         info = malloc(sizeof(*info));
 224         assert(info != NULL);
 225         /* note: check->altinfo is NULL as of the bcopy */
 226         bcopy(check, info, sizeof(*info));
 227         check->altinfo = info;
 228         lin_reset(info);
 229     }
 230
 231     /*
 232      * Replace our current linear regression with the alternate once
 233      * the current one has hit its limit (beyond a certain point the
 234      * linear regression starts to work against us, preventing us from
 235      * reacting to changing conditions).
 236      *
 237      * Report any significant change in the offset or ppm.
 238      */
 239     if (check->lin_count >= LIN_RESTART) {
 240         if ((info = check->altinfo) && info->lin_count >= LIN_RESTART / 2) {
 241             double freq_diff;
 242
 243             freq_diff = info->lin_cache_freq - check->lin_cache_freq;
 244             logdebug(4, "%s: Switching to alternate, Frequence "
 245                     "difference is %6.3f ppm\n",
 246                     info->target, freq_diff * 1.0E+6);
 247             *checkp = info;
 248             free(check);
 249             check = info;
 250         }
 251     }
 252
 253     /*
 254      * BEST CLIENT FOR FREQUENCY CORRECTION:
 255      *
 256      *  8 samples and a correllation > 0.99, or
 257      * 16 samples and a correllation > 0.96
 258      */
 259     info = *best_freq;
 260     if ((check->lin_count >= 8 && fabs(check->lin_cache_corr) >= 0.99) ||
 261         (check->lin_count >= 16 && fabs(check->lin_cache_corr) >= 0.96)
 262     ) {
 263         if (info == NULL ||
 264             fabs(check->lin_cache_corr) > fabs(info->lin_cache_corr)
 265         ) {
 266             info = check;
 267             *best_freq = info;
 268         }
 269
 270     }
 271
 272     /*
 273      * BEST CLIENT FOR OFFSET CORRECTION:
 274      *
 275      * Use the standard-deviation and require at least 4 samples.  An
 276      * offset correction is valid if the standard deviation is less then
 277      * the average offset divided by 4.
 278      */
 279     info = *best_off;
 280     if (check->lin_countoffset >= 4 &&
 281         check->lin_cache_stddev < fabs(check->lin_sumoffset / check->lin_countoffset / 4)) {
 282         if (info == NULL ||
 283             fabs(check->lin_cache_stddev) < fabs(info->lin_cache_stddev)
 284         ) {
 285             info = check;
 286             *best_off = info;
 287         }
 288     }
 289 }
 290
 291 /*
 292  * Actively manage the polling interval.  Note that the poll_* fields are
 293  * always transfered to the alternate regression when the check code replaces
 294  * the current regression with a new one.
 295  *
 296  * This routine is called from the main loop for each base info structure.
 297  * The polling mode applies to all alternates so we do not have to iterate
 298  * through the alt's.
 299  */
 300 void
 301 client_manage_polling_mode(struct server_info *info)
 302 {
 303     /*
 304      * If too many query failures occured go into a failure-recovery state.
 305      * If we were in startup when we failed, go into the second failure
 306      * state so a recovery returns us back to startup mode.
 307      */
 308     if (info->poll_failed >= POLL_FAIL_RESET &&
 309         info->poll_mode != POLL_FAILED_1 &&
 310         info->poll_mode != POLL_FAILED_2
 311     ) {
 312         logdebug(2, "%s: polling mode moving to a FAILED state.\n",
 313                 info->target);
 314         if (info->poll_mode != POLL_STARTUP)
 315             info->poll_mode = POLL_FAILED_1;
 316         else
 317             info->poll_mode = POLL_FAILED_2;
 318         info->poll_count = 0;
 319     }
 320
 321     /*
 322      * Standard polling mode progression
 323      */
 324     switch(info->poll_mode) {
 325     case POLL_FIXED:
 326         info->poll_mode = POLL_STARTUP;
 327         info->poll_count = 0;
 328         logdebug(2, "%s: polling mode INIT->STARTUP.\n", info->target);
 329         /* fall through */
 330     case POLL_STARTUP:
 331         if (info->poll_count < POLL_STARTUP_MAX) {
 332             if (info->poll_sleep == 0)
 333                 info->poll_sleep = min_sleep_opt;
 334             break;
 335         }
 336         info->poll_mode = POLL_ACQUIRE;
 337         info->poll_count = 0;
 338         logdebug(2, "%s: polling mode STARTUP->ACQUIRE.\n", info->target);
 339         /* fall through */
 340     case POLL_ACQUIRE:
 341         /*
 342          * Acquisition mode using the nominal timeout.  We do not shift
 343          * to maintainance mode unless the correllation is at least 0.90
 344          */
 345         if (info->poll_count < POLL_ACQUIRE_MAX ||
 346             info->lin_count < 8 ||
 347             fabs(info->lin_cache_corr) < 0.85
 348         ) {
 349             if (info->poll_count >= POLL_ACQUIRE_MAX &&
 350                 info->lin_count == LIN_RESTART - 2
 351             ) {
 352                 logdebug(2,
 353                     "%s: WARNING: Unable to shift this source to "
 354                     "maintainance mode.  Target correllation is aweful.\n",
 355                     info->target);
 356             }
 357             if (info->poll_sleep == 0)
 358                 info->poll_sleep = nom_sleep_opt;
 359             break;
 360         }
 361         info->poll_mode = POLL_MAINTAIN;
 362         info->poll_count = 0;
 363         logdebug(2, "%s: polling mode ACQUIRE->MAINTAIN.\n", info->target);
 364         /* fall through */
 365     case POLL_MAINTAIN:
 366         if (info->lin_count >= LIN_RESTART / 2 &&
 367             fabs(info->lin_cache_corr) < 0.70
 368         ) {
 369             logdebug(2,
 370                 "%s: polling mode MAINTAIN->ACQUIRE.  Unable to maintain\n"
 371                 "the maintainance mode because the correllation went"
 372                 " bad!\n", info->target);
 373             info->poll_mode = POLL_ACQUIRE;
 374             info->poll_count = 0;
 375             break;
 376         }
 377         if (info->poll_sleep == 0)
 378             info->poll_sleep = max_sleep_opt;
 379         /* do nothing */
 380         break;
 381     case POLL_FAILED_1:
 382         /*
 383          * We have failed recently. If we recover return to the acquisition
 384          * state.
 385          *
 386          * poll_count does not increment while we are failed.  poll_failed
 387          * does increment (but gets zero'd once we recover).
 388          */
 389         if (info->poll_count != 0) {
 390             logdebug(2, "%s: polling mode FAILED1->ACQUIRE.\n", info->target);
 391             info->poll_mode = POLL_ACQUIRE;
 392             /* do not reset poll_count */
 393             break;
 394         }
 395         if (info->poll_failed >= POLL_RECOVERY_RESTART)
 396             info->poll_mode = POLL_FAILED_2;
 397         if (info->poll_sleep == 0)
 398             info->poll_sleep = nom_sleep_opt;
 399         break;
 400     case POLL_FAILED_2:
 401         /*
 402          * We have been failed for a very long time, or we failed while
 403          * in startup.  If we recover we have to go back into startup.
 404          */
 405         if (info->poll_count != 0) {
 406             logdebug(2, "%s: polling mode FAILED2->STARTUP.\n", info->target);
 407             info->poll_mode = POLL_STARTUP;
 408             break;
 409         }
 410         if (info->poll_sleep == 0)
 411             info->poll_sleep = nom_sleep_opt;
 412         break;
 413     }
 414 }
 415
 416 /*
 417  * Linear regression.
 418  *
 419  *      ltv     local time as of when the offset error was calculated between
 420  *              local time and remote time.
 421  *
 422  *      lbtv    base time as of when local time was obtained.  Used to
 423  *              calculate the cumulative corrections made to the system's
 424  *              real time clock so we can de-correct the offset for the
 425  *              linear regression.
 426  *
 427  * X is the time axis, in seconds.
 428  * Y is the uncorrected offset, in seconds.
 429  */
 430 void
 431 lin_regress(server_info_t info, struct timeval *ltv, struct timeval *lbtv,
 432             double offset, int calc_offset_correction)
 433 {
 434     double time_axis;
 435     double uncorrected_offset;
 436
 437     /*
 438      * De-correcting the offset:
 439      *
 440      *  The passed offset is (our_real_time - remote_real_time).  To remove
 441      *  corrections from our_real_time we take the difference in the basetime
 442      *  (new_base_time - old_base_time) and subtract that from the offset.
 443      *  That is, if the basetime goesup, the uncorrected offset goes down.
 444      */
 445     if (info->lin_count == 0) {
 446         info->lin_tv = *ltv;
 447         info->lin_btv = *lbtv;
 448         time_axis = 0;
 449         uncorrected_offset = offset;
 450     } else {
 451         time_axis = tv_delta_double(&info->lin_tv, ltv);
 452         uncorrected_offset = offset - tv_delta_double(&info->lin_btv, lbtv);
 453     }
 454
 455     /*
 456      * We have to use the uncorrected offset for frequency calculations.
 457      */
 458     ++info->lin_count;
 459     info->lin_sumx += time_axis;
 460     info->lin_sumx2 += time_axis * time_axis;
 461     info->lin_sumy += uncorrected_offset;
 462     info->lin_sumy2 += uncorrected_offset * uncorrected_offset;
 463     info->lin_sumxy += time_axis * uncorrected_offset;
 464
 465     /*
 466      * We have to use the corrected offset for offset calculations.
 467      */
 468     if (calc_offset_correction) {
 469         ++info->lin_countoffset;
 470         info->lin_sumoffset += offset;
 471         info->lin_sumoffset2 += offset * offset;
 472     }
 473
 474     /*
 475      * Calculate various derived values.   This gets us slope, y-intercept,
 476      * and correllation from the linear regression.
 477      */
 478     if (info->lin_count > 1) {
 479         info->lin_cache_slope =
 480          (info->lin_count * info->lin_sumxy - info->lin_sumx * info->lin_sumy) /
 481          (info->lin_count * info->lin_sumx2 - info->lin_sumx * info->lin_sumx);
 482
 483         info->lin_cache_yint =
 484          (info->lin_sumy - info->lin_cache_slope * info->lin_sumx) /
 485          (info->lin_count);
 486
 487         info->lin_cache_corr =
 488          (info->lin_count * info->lin_sumxy - info->lin_sumx * info->lin_sumy) /
 489          sqrt((info->lin_count * info->lin_sumx2 -
 490                       info->lin_sumx * info->lin_sumx) *
 491              (info->lin_count * info->lin_sumy2 -
 492                       info->lin_sumy * info->lin_sumy)
 493          );
 494     }
 495
 496     /*
 497      * Calculate more derived values.  This gets us the standard-deviation
 498      * of offsets.  The standard deviation approximately means that 68%
 499      * of the samples fall within the calculated stddev of the mean.
 500      */
 501     if (info->lin_countoffset > 1) {
 502          info->lin_cache_stddev =
 503              sqrt((info->lin_sumoffset2 -
 504                  ((info->lin_sumoffset * info->lin_sumoffset /
 505                    info->lin_countoffset))) /
 506                  (info->lin_countoffset - 1.0));
 507     }
 508
 509     /*
 510      * Save the most recent offset, we might use it in the future.
 511      * Save the frequency correction (we might scale the slope later so
 512      * we have a separate field for the actual frequency correction in
 513      * seconds per second).
 514      */
 515     info->lin_cache_offset = offset;
 516     info->lin_cache_freq = info->lin_cache_slope;
 517
 518     if (debug_level >= 4) {
 519         logdebug(4, "iter=%2d time=%7.3f off=%.6f uoff=%.6f",
 520             (int)info->lin_count,
 521             time_axis, offset, uncorrected_offset);
 522         if (info->lin_count > 1) {
 523             logdebug(4, " slope %7.6f"
 524                             " yint %3.2f corr %7.6f freq_ppm %4.2f",
 525                 info->lin_cache_slope,
 526                 info->lin_cache_yint,
 527                 info->lin_cache_corr,
 528                 info->lin_cache_freq * 1000000.0);
 529         }
 530         if (info->lin_countoffset > 1) {
 531             logdebug(4, " stddev %7.6f", info->lin_cache_stddev);
 532         } else if (calc_offset_correction == 0) {
 533             /* cannot calculate offset correction due to prior correction */
 534             logdebug(4, " offset_ignored");
 535         }
 536         logdebug(4, "\n");
 537     }
 538 }
 539
 540 /*
 541  * Reset the linear regression data.  The info structure will not again be
 542  * a candidate for frequency or offset correction until sufficient data
 543  * has been accumulated to make a decision.
 544  */
 545 void
 546 lin_reset(server_info_t info)
 547 {
 548     server_info_t scan;
 549
 550     info->lin_count = 0;
 551     info->lin_sumx = 0;
 552     info->lin_sumy = 0;
 553     info->lin_sumxy = 0;
 554     info->lin_sumx2 = 0;
 555     info->lin_sumy2 = 0;
 556
 557     info->lin_countoffset = 0;
 558     info->lin_sumoffset = 0;
 559     info->lin_sumoffset2 = 0;
 560
 561     info->lin_cache_slope = 0;
 562     info->lin_cache_yint = 0;
 563     info->lin_cache_corr = 0;
 564     info->lin_cache_offset = 0;
 565     info->lin_cache_freq = 0;
 566
 567     /*
 568      * Destroy any additional alternative regressions.
 569      */
 570     while ((scan = info->altinfo) != NULL) {
 571         info->altinfo = scan->altinfo;
 572         free(scan);
 573     }
 574 }
 575
 576 /*
 577  * Sometimes we want to clean out the offset calculations without
 578  * destroying the linear regression used to figure out the frequency
 579  * correction.  This usually occurs whenever we issue an offset
 580  * adjustment to the system, which invalidates any offset data accumulated
 581  * up to that point.
 582  */
 583 void
 584 lin_resetalloffsets(struct server_info **info_ary, int count)
 585 {
 586     server_info_t info;
 587     int i;
 588
 589     for (i = 0; i < count; ++i) {
 590         for (info = info_ary[i]; info; info = info->altinfo)
 591             lin_resetoffsets(info);
 592     }
 593 }
 594
 595 void
 596 lin_resetoffsets(server_info_t info)
 597 {
 598     info->lin_countoffset = 0;
 599     info->lin_sumoffset = 0;
 600     info->lin_sumoffset2 = 0;
 601 }
 602