usr.sbin/dntpd/client.c

   1 /*
   2  * Copyright (c) 2005 The DragonFly Project.  All rights reserved.
   3  *
   4  * This code is derived from software contributed to The DragonFly Project
   5  * by Matthew Dillon <dillon@backplane.com>
   6  *
   7  * Redistribution and use in source and binary forms, with or without
   8  * modification, are permitted provided that the following conditions
   9  * are met:
  10  *
  11  * 1. Redistributions of source code must retain the above copyright
  12  *    notice, this list of conditions and the following disclaimer.
  13  * 2. Redistributions in binary form must reproduce the above copyright
  14  *    notice, this list of conditions and the following disclaimer in
  15  *    the documentation and/or other materials provided with the
  16  *    distribution.
  17  * 3. Neither the name of The DragonFly Project nor the names of its
  18  *    contributors may be used to endorse or promote products derived
  19  *    from this software without specific, prior written permission.
  20  *
  21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  22  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  23  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
  24  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
  25  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
  26  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
  27  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  28  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
  29  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
  30  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
  31  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  32  * SUCH DAMAGE.
  33  */
  34
  35 #include "defs.h"
  36
  37 static int client_insane(struct server_info **, int, server_info_t);
  38
  39 void
  40 client_init(void)
  41 {
  42 }
  43
  44 int
  45 client_main(struct server_info **info_ary, int count)
  46 {
  47     struct server_info *best_off;
  48     struct server_info *best_freq;
  49     double last_freq;
  50     double freq;
  51     double offset;
  52     int calc_offset_correction;
  53     int didreconnect;
  54     int i;
  55     int insane;
  56
  57     last_freq = 0.0;
  58
  59     for (;;) {
  60         /*
  61          * Subtract the interval from poll_sleep and poll the client
  62          * if it reaches 0.
  63          *
  64          * Because we do not compensate for offset corrections which are
  65          * in progress, we cannot accumulate data for an offset correction
  66          * while a prior correction is still being worked through by the
  67          * system.
  68          */
  69         calc_offset_correction = !sysntp_offset_correction_is_running();
  70         for (i = 0; i < count; ++i)
  71             client_poll(info_ary[i], min_sleep_opt, calc_offset_correction);
  72
  73         /*
  74          * Find the best client (or synthesize one).  A different client
  75          * can be chosen for frequency and offset.  Note in particular
  76          * that offset counters and averaging code gets reset when an
  77          * offset correction is made (otherwise the averaging history will
  78          * cause later corrections to overshoot).
  79          *
  80          * The regression used to calculate the frequency is a much
  81          * longer-term entity and is NOT reset, so it is still possible
  82          * for the offset correction code to make minor adjustments to
  83          * the frequency if it so desires.
  84          *
  85          * client_check may replace the server_info pointer with a new
  86          * one.
  87          */
  88         best_off = NULL;
  89         best_freq = NULL;
  90         for (i = 0; i < count; ++i)
  91             client_check(&info_ary[i], &best_off, &best_freq);
  92
  93         /*
  94          * Check for server insanity.  In large NNTP pools some servers
  95          * may just be dead wrong, but report that they are right.
  96          */
  97         if (best_off) {
  98             insane = client_insane(info_ary, count, best_off);
  99             if (insane > 0) {
 100                 /*
 101                  * best_off meets the quorum requirements and is good
 102                  * (keep best_off)
 103                  */
 104                 best_off->server_insane = 0;
 105             } else if (insane == 0) {
 106                 /*
 107                  * best_off is probably good, but we do not have enough
 108                  * servers reporting yet to meet the quorum requirements.
 109                  */
 110                 best_off = NULL;
 111             } else {
 112                 /*
 113                  * best_off is ugly, mark the server as being insane for
 114                  * 60 minutes.
 115                  */
 116                 best_off->server_insane = 60 * 60;
 117                 logdebuginfo(best_off, 1,
 118                              "excessive offset deviation, mapping out\n");
 119                 best_off = NULL;
 120             }
 121         }
 122
 123         /*
 124          * Offset correction.
 125          */
 126         if (best_off) {
 127             offset = best_off->lin_sumoffset / best_off->lin_countoffset;
 128             lin_resetalloffsets(info_ary, count);
 129             if (offset < -COURSE_OFFSET_CORRECTION_LIMIT ||
 130                 offset > COURSE_OFFSET_CORRECTION_LIMIT ||
 131                 quickset_opt
 132             ) {
 133                 freq = sysntp_correct_course_offset(offset);
 134                 quickset_opt = 0;
 135             } else {
 136                 freq = sysntp_correct_offset(offset);
 137             }
 138         } else {
 139             freq = 0.0;
 140         }
 141
 142         /*
 143          * Frequency correction (throw away minor freq adjusts from the
 144          * offset code if we can't do a frequency correction here).  Do
 145          * not reissue if it hasn't changed from the last issued correction.
 146          */
 147         if (best_freq) {
 148             freq += best_freq->lin_cache_freq;
 149             if (last_freq != freq) {
 150                 sysntp_correct_freq(freq);
 151                 last_freq = freq;
 152             }
 153         }
 154
 155         /*
 156          * This function is responsible for managing the polling mode and
 157          * figures out how long we should sleep.
 158          */
 159         didreconnect = 0;
 160         for (i = 0; i < count; ++i)
 161             client_manage_polling_mode(info_ary[i], &didreconnect);
 162         if (didreconnect)
 163             client_check_duplicate_ips(info_ary, count);
 164
 165         /*
 166          * Polling loop sleep.
 167          */
 168         usleep(min_sleep_opt * 1000000 + random() % 500000);
 169     }
 170 }
 171
 172 void
 173 client_poll(server_info_t info, int poll_interval, int calc_offset_correction)
 174 {
 175     struct timeval rtv;
 176     struct timeval ltv;
 177     struct timeval lbtv;
 178     double offset;
 179
 180     /*
 181      * Adjust the insane-server countdown
 182      */
 183     if (info->server_insane > poll_interval)
 184         info->server_insane -= poll_interval;
 185     else
 186         info->server_insane = 0;
 187
 188     /*
 189      * By default we always poll.  If the polling interval comes under
 190      * active management the poll_sleep will be non-zero.
 191      */
 192     if (info->poll_sleep > poll_interval) {
 193         info->poll_sleep -= poll_interval;
 194         return;
 195     }
 196     info->poll_sleep = 0;
 197
 198     /*
 199      * If the client isn't open don't mess with the poll_failed count
 200      * or anything else.  We are left in the init or startup phase.
 201      */
 202     if (info->fd < 0) {
 203         if (info->poll_failed < 0x7FFFFFFF)
 204             ++info->poll_failed;
 205         return;
 206     }
 207
 208     logdebuginfo(info, 4, "poll, ");
 209     if (udp_ntptimereq(info->fd, &rtv, &ltv, &lbtv) < 0) {
 210         ++info->poll_failed;
 211         logdebug(4, "no response (%d failures in a row)\n", info->poll_failed);
 212         if (info->poll_failed == POLL_FAIL_RESET) {
 213             if (info->lin_count != 0) {
 214                 logdebuginfo(info, 4, "resetting regression due to failures\n");
 215             }
 216             lin_reset(info);
 217         }
 218         return;
 219     }
 220
 221     /*
 222      * Successful query.  Update polling info for the polling mode manager.
 223      */
 224     ++info->poll_count;
 225     info->poll_failed = 0;
 226
 227     /*
 228      * Figure out the offset (the difference between the reported
 229      * time and our current time) for linear regression purposes.
 230      */
 231     offset = tv_delta_double(&rtv, &ltv);
 232
 233     while (info) {
 234         /*
 235          * Linear regression
 236          */
 237         if (debug_level >= 4) {
 238             struct tm *tp;
 239             char buf[64];
 240             time_t t;
 241
 242             t = rtv.tv_sec;
 243             tp = localtime(&t);
 244             strftime(buf, sizeof(buf), "%d-%b-%Y %H:%M:%S", tp);
 245             logdebug(4, "%s.%03ld ", buf, rtv.tv_usec / 1000);
 246         }
 247         lin_regress(info, &ltv, &lbtv, offset, calc_offset_correction);
 248         info = info->altinfo;
 249         if (info && debug_level >= 4) {
 250             logdebug(4, "%*.*s: poll, ",
 251                 (int)strlen(info->target),
 252                 (int)strlen(info->target), "(alt)");
 253         }
 254     }
 255 }
 256
 257 /*
 258  * Find the best client (or synthesize a fake info structure to return).
 259  * We can find separate best clients for offset and frequency.
 260  */
 261 void
 262 client_check(struct server_info **checkp,
 263              struct server_info **best_off,
 264              struct server_info **best_freq)
 265 {
 266     struct server_info *check = *checkp;
 267     struct server_info *info;
 268     int min_samples;
 269
 270     /*
 271      * Start an alternate linear regression once our current one
 272      * has passed a certain point.
 273      */
 274     if (check->lin_count >= LIN_RESTART / 2 && check->altinfo == NULL) {
 275         info = malloc(sizeof(*info));
 276         assert(info != NULL);
 277         /* note: check->altinfo is NULL as of the bcopy */
 278         bcopy(check, info, sizeof(*info));
 279         check->altinfo = info;
 280         lin_reset(info);
 281     }
 282
 283     /*
 284      * Replace our current linear regression with the alternate once
 285      * the current one has hit its limit (beyond a certain point the
 286      * linear regression starts to work against us, preventing us from
 287      * reacting to changing conditions).
 288      *
 289      * Report any significant change in the offset or ppm.
 290      */
 291     if (check->lin_count >= LIN_RESTART) {
 292         if ((info = check->altinfo) && info->lin_count >= LIN_RESTART / 2) {
 293             double freq_diff;
 294
 295             freq_diff = info->lin_cache_freq - check->lin_cache_freq;
 296             logdebuginfo(info, 4, "Switching to alternate, Frequency "
 297                          "difference is %6.3f ppm\n",
 298                          freq_diff * 1.0E+6);
 299             *checkp = info;
 300             free(check);
 301             check = info;
 302         }
 303     }
 304
 305     /*
 306      * BEST CLIENT FOR FREQUENCY CORRECTION:
 307      *
 308      * Frequency corrections get better the longer the time separation
 309      * between samples.
 310      *
 311      *  8 samples and a correlation > 0.99, or
 312      * 16 samples and a correlation > 0.96
 313      */
 314     info = *best_freq;
 315     if ((check->lin_count >= 8 && fabs(check->lin_cache_corr) >= 0.99) ||
 316         (check->lin_count >= 16 && fabs(check->lin_cache_corr) >= 0.96)
 317     ) {
 318         if (info == NULL ||
 319             fabs(check->lin_cache_corr) > fabs(info->lin_cache_corr)
 320         ) {
 321             info = check;
 322             *best_freq = info;
 323         }
 324
 325     }
 326
 327     /*
 328      * BEST CLIENT FOR OFFSET CORRECTION:
 329      *
 330      * Use the standard-deviation and require at least 4 samples.  An
 331      * offset correction is valid if the standard deviation is less then
 332      * the average offset divided by 4.
 333      *
 334      * If we are in maintainance mode, require 8 samples instead of 4.
 335      * Offset corrections get better with more samples.  This reduces
 336      * ping-pong effects that can occur with a small number of samples.
 337      *
 338      * Servers marked as being insane are not allowed
 339      */
 340     info = *best_off;
 341     if (info && info->poll_mode == POLL_MAINTAIN)
 342         min_samples = 8;
 343     else
 344         min_samples = 4;
 345     if (check->lin_countoffset >= min_samples &&
 346         (check->lin_cache_stddev <
 347          fabs(check->lin_sumoffset / check->lin_countoffset / 4)) &&
 348         check->server_insane == 0
 349      ) {
 350         if (info == NULL ||
 351             fabs(check->lin_cache_stddev) < fabs(info->lin_cache_stddev)
 352         ) {
 353             info = check;
 354             *best_off = info;
 355         }
 356     }
 357 }
 358
 359 /*
 360  * Actively manage the polling interval.  Note that the poll_* fields are
 361  * always transfered to the alternate regression when the check code replaces
 362  * the current regression with a new one.
 363  *
 364  * This routine is called from the main loop for each base info structure.
 365  * The polling mode applies to all alternates so we do not have to iterate
 366  * through the alt's.
 367  */
 368 void
 369 client_manage_polling_mode(struct server_info *info, int *didreconnect)
 370 {
 371     /*
 372      * Permanently failed servers are ignored.
 373      */
 374     if (info->server_state == -2)
 375         return;
 376
 377     /*
 378      * Our polling interval has not yet passed.
 379      */
 380     if (info->poll_sleep)
 381         return;
 382
 383     /*
 384      * Standard polling mode progression
 385      */
 386     switch(info->poll_mode) {
 387     case POLL_FIXED:
 388         /*
 389          * Initial state after connect or when a reconnect is required.
 390          */
 391         if (info->fd < 0) {
 392             logdebuginfo(info, 2, "polling mode INIT, relookup & reconnect\n");
 393             reconnect_server(info);
 394             *didreconnect = 1;
 395             if (info->fd < 0) {
 396                 if (info->poll_failed >= POLL_RECOVERY_RESTART * 5)
 397                     info->poll_sleep = max_sleep_opt;
 398                 else if (info->poll_failed >= POLL_RECOVERY_RESTART)
 399                     info->poll_sleep = nom_sleep_opt;
 400                 else
 401                     info->poll_sleep = min_sleep_opt;
 402                 break;
 403             }
 404
 405             /*
 406              * Transition the server to the DNS lookup successful state.
 407              * Note that the server state does not transition out of
 408              * lookup successful if we relookup after a packet failure
 409              * so the message is printed only once, usually.
 410              */
 411             client_setserverstate(info, 0, "DNS lookup success");
 412
 413             /*
 414              * If we've failed many times switch to the startup state but
 415              * do not fall through into it.  break the switch and a single
 416              * poll will be made after the nominal polling interval.
 417              */
 418             if (info->poll_failed >= POLL_RECOVERY_RESTART * 5) {
 419                 logdebuginfo(info, 2, "polling mode INIT->STARTUP (very slow)\n");
 420                 info->poll_mode = POLL_STARTUP;
 421                 info->poll_sleep = max_sleep_opt;
 422                 info->poll_count = 0;
 423                 break;
 424             } else if (info->poll_failed >= POLL_RECOVERY_RESTART) {
 425                 logdebuginfo(info, 2, "polling mode INIT->STARTUP (slow)\n");
 426                 info->poll_mode = POLL_STARTUP;
 427                 info->poll_count = 0;
 428                 break;
 429             }
 430         }
 431
 432         /*
 433          * Fall through to the startup state.
 434          */
 435         info->poll_mode = POLL_STARTUP;
 436         logdebuginfo(info, 2, "polling mode INIT->STARTUP (normal)\n");
 437         /* fall through */
 438     case POLL_STARTUP:
 439         /*
 440          * Transition to a FAILED state if too many poll failures occured.
 441          */
 442         if (info->poll_failed >= POLL_FAIL_RESET) {
 443             logdebuginfo(info, 2, "polling mode STARTUP->FAILED\n");
 444             info->poll_mode = POLL_FAILED;
 445             info->poll_count = 0;
 446             break;
 447         }
 448
 449         /*
 450          * Transition the server to operational.  Do a number of minimum
 451          * interval polls to try to get a good offset calculation quickly.
 452          */
 453         if (info->poll_count)
 454             client_setserverstate(info, 1, "connected ok");
 455         if (info->poll_count < POLL_STARTUP_MAX) {
 456             info->poll_sleep = min_sleep_opt;
 457             break;
 458         }
 459
 460         /*
 461          * Once we've got our polls fall through to aquisition mode to
 462          * do aquisition processing.
 463          */
 464         info->poll_mode = POLL_ACQUIRE;
 465         info->poll_count = 0;
 466         logdebuginfo(info, 2, "polling mode STARTUP->ACQUIRE\n");
 467         /* fall through */
 468     case POLL_ACQUIRE:
 469         /*
 470          * Transition to a FAILED state if too many poll failures occured.
 471          */
 472         if (info->poll_failed >= POLL_FAIL_RESET) {
 473             logdebuginfo(info, 2, "polling mode STARTUP->FAILED\n");
 474             info->poll_mode = POLL_FAILED;
 475             info->poll_count = 0;
 476             break;
 477         }
 478
 479         /*
 480          * Acquisition mode using the nominal timeout.  We do not shift
 481          * to maintainance mode unless the correlation is at least 0.90
 482          */
 483         if (info->poll_count < POLL_ACQUIRE_MAX ||
 484             info->lin_count < 8 ||
 485             fabs(info->lin_cache_corr) < 0.85
 486         ) {
 487             if (info->poll_count >= POLL_ACQUIRE_MAX &&
 488                 info->lin_count == LIN_RESTART - 2
 489             ) {
 490                 logdebuginfo(info, 2,
 491                     "WARNING: Unable to shift this source to "
 492                     "maintenance mode.  Target correlation is aweful\n");
 493             }
 494             break;
 495         }
 496         info->poll_mode = POLL_MAINTAIN;
 497         info->poll_count = 0;
 498         logdebuginfo(info, 2, "polling mode ACQUIRE->MAINTAIN\n");
 499         /* fall through */
 500     case POLL_MAINTAIN:
 501         /*
 502          * Transition to a FAILED state if too many poll failures occured.
 503          */
 504         if (info->poll_failed >= POLL_FAIL_RESET) {
 505             logdebuginfo(info, 2, "polling mode STARTUP->FAILED\n");
 506             info->poll_mode = POLL_FAILED;
 507             info->poll_count = 0;
 508             break;
 509         }
 510
 511         /*
 512          * Maintaince mode, max polling interval.
 513          *
 514          * Transition back to acquisition mode if we are unable to maintain
 515          * this mode due to the correlation going bad.
 516          */
 517         if (info->lin_count >= LIN_RESTART / 2 &&
 518             fabs(info->lin_cache_corr) < 0.70
 519         ) {
 520             logdebuginfo(info, 2,
 521                 "polling mode MAINTAIN->ACQUIRE.  Unable to maintain\n"
 522                 "the maintenance mode because the correlation went"
 523                 " bad!\n");
 524             info->poll_mode = POLL_ACQUIRE;
 525             info->poll_count = 0;
 526             break;
 527         }
 528         info->poll_sleep = max_sleep_opt;
 529         break;
 530     case POLL_FAILED:
 531         /*
 532          * We have a communications failure.  A late recovery is possible
 533          * if we enter this state with a good poll.
 534          */
 535         if (info->poll_count != 0) {
 536             logdebuginfo(info, 2, "polling mode FAILED->ACQUIRE\n");
 537             if (info->poll_failed >= POLL_FAIL_RESET)
 538                 info->poll_mode = POLL_STARTUP;
 539             else
 540                 info->poll_mode = POLL_ACQUIRE;
 541             /* do not reset poll_count */
 542             break;
 543         }
 544
 545         /*
 546          * If we have been failed too long, disconnect from the server
 547          * and start us all over again.  Note that the failed count is not
 548          * reset to 0.
 549          */
 550         if (info->poll_failed >= POLL_RECOVERY_RESTART) {
 551             logdebuginfo(info, 2, "polling mode FAILED->INIT\n");
 552             client_setserverstate(info, 0, "FAILED");
 553             disconnect_server(info);
 554             info->poll_mode = POLL_FIXED;
 555             break;
 556         }
 557         break;
 558     }
 559
 560     /*
 561      * If the above state machine has not set a polling interval, set a
 562      * nominal polling interval.
 563      */
 564     if (info->poll_sleep == 0)
 565         info->poll_sleep = nom_sleep_opt;
 566 }
 567
 568 /*
 569  * Look for duplicate IP addresses.  This is done very inoften, so we do
 570  * not use a particularly efficient algorithm.
 571  *
 572  * Only reconnect a client which has not done its initial poll.
 573  */
 574 void
 575 client_check_duplicate_ips(struct server_info **info_ary, int count)
 576 {
 577     server_info_t info1;
 578     server_info_t info2;
 579     int tries;
 580     int i;
 581     int j;
 582
 583     for (i = 0; i < count; ++i) {
 584         info1 = info_ary[i];
 585         if (info1->fd < 0 || info1->server_state != 0)
 586             continue;
 587         for (tries = 0; tries < 10; ++tries) {
 588             for (j = 0; j < count; ++j) {
 589                 info2 = info_ary[j];
 590                 if (i == j || info2->fd < 0)
 591                     continue;
 592                 if (info1->fd < 0 || /* info1 was lost in previous reconnect */
 593                     strcmp(info1->ipstr, info2->ipstr) == 0) {
 594                     reconnect_server(info1);
 595                     break;
 596                 }
 597             }
 598             if (j == count)
 599                 break;
 600         }
 601         if (tries == 10) {
 602             disconnect_server(info1);
 603             client_setserverstate(info1, -2,
 604                                   "permanently disabling duplicate server");
 605         }
 606     }
 607 }
 608
 609 /*
 610  * Calculate whether the server pointed to by *bestp is insane or not.
 611  * For some reason some servers in e.g. the ntp pool are sometimes an hour
 612  * off.  If we have at least three servers in the pool require that a
 613  * quorum agree that the current best server's offset is reasonable.
 614  *
 615  * Allow +/- 0.5 seconds of error for now (settable with option).
 616  *
 617  * Returns -1 if insane, 0 if not enough samples, and 1 if ok
 618  */
 619 static
 620 int
 621 client_insane(struct server_info **info_ary, int count, server_info_t best)
 622 {
 623     server_info_t info;
 624     double best_offset;
 625     double info_offset;
 626     int good;
 627     int bad;
 628     int skip;
 629     int quorum;
 630     int i;
 631
 632     /*
 633      * If only one ntp server we cannot check to see if it is insane
 634      */
 635     if (count < 2)
 636             return(1);
 637     best_offset = best->lin_sumoffset / best->lin_countoffset;
 638
 639     /*
 640      * Calculated the quorum.  Do not count permanently failed servers
 641      * in the calculation.
 642      *
 643      * adjusted count   quorum
 644      *   2                2
 645      *   3                2
 646      *   4                3
 647      *   5                3
 648      */
 649     quorum = count;
 650     for (i = 0; i < count; ++i) {
 651         info = info_ary[i];
 652         if (info->server_state == -2)
 653             --quorum;
 654     }
 655
 656     quorum = quorum / 2 + 1;
 657     good = 0;
 658     bad = 0;
 659     skip = 0;
 660
 661     /*
 662      * Find the good, the bad, and the ugly.  We need at least four samples
 663      * and a stddev within the deviation being checked to count a server
 664      * in the calculation.
 665      */
 666     for (i = 0; i < count; ++i) {
 667         info = info_ary[i];
 668         if (info->lin_countoffset < 4 ||
 669             info->lin_cache_stddev > insane_deviation
 670         ) {
 671             ++skip;
 672             continue;
 673         }
 674
 675         info_offset = info->lin_sumoffset / info->lin_countoffset;
 676         info_offset -= best_offset;
 677         if (info_offset < -insane_deviation || info_offset > insane_deviation)
 678                 ++bad;
 679         else
 680                 ++good;
 681     }
 682
 683     /*
 684      * Did we meet our quorum?
 685      */
 686     logdebuginfo(best, 5, "insanecheck good=%d bad=%d skip=%d "
 687                           "quorum=%d (allowed=%-+8.6f)\n",
 688                  good, bad, skip, quorum, insane_deviation);
 689     if (good >= quorum)
 690         return(1);
 691     if (good + skip >= quorum)
 692         return(0);
 693     return(-1);
 694 }
 695
 696 /*
 697  * Linear regression.
 698  *
 699  *      ltv     local time as of when the offset error was calculated between
 700  *              local time and remote time.
 701  *
 702  *      lbtv    base time as of when local time was obtained.  Used to
 703  *              calculate the cumulative corrections made to the system's
 704  *              real time clock so we can de-correct the offset for the
 705  *              linear regression.
 706  *
 707  * X is the time axis, in seconds.
 708  * Y is the uncorrected offset, in seconds.
 709  */
 710 void
 711 lin_regress(server_info_t info, struct timeval *ltv, struct timeval *lbtv,
 712             double offset, int calc_offset_correction)
 713 {
 714     double time_axis;
 715     double uncorrected_offset;
 716
 717     /*
 718      * De-correcting the offset:
 719      *
 720      *  The passed offset is (our_real_time - remote_real_time).  To remove
 721      *  corrections from our_real_time we take the difference in the basetime
 722      *  (new_base_time - old_base_time) and subtract that from the offset.
 723      *  That is, if the basetime goesup, the uncorrected offset goes down.
 724      */
 725     if (info->lin_count == 0) {
 726         info->lin_tv = *ltv;
 727         info->lin_btv = *lbtv;
 728         time_axis = 0;
 729         uncorrected_offset = offset;
 730     } else {
 731         time_axis = tv_delta_double(&info->lin_tv, ltv);
 732         uncorrected_offset = offset - tv_delta_double(&info->lin_btv, lbtv);
 733     }
 734
 735     /*
 736      * We have to use the uncorrected offset for frequency calculations.
 737      */
 738     ++info->lin_count;
 739     info->lin_sumx += time_axis;
 740     info->lin_sumx2 += time_axis * time_axis;
 741     info->lin_sumy += uncorrected_offset;
 742     info->lin_sumy2 += uncorrected_offset * uncorrected_offset;
 743     info->lin_sumxy += time_axis * uncorrected_offset;
 744
 745     /*
 746      * We have to use the corrected offset for offset calculations.
 747      */
 748     if (calc_offset_correction) {
 749         ++info->lin_countoffset;
 750         info->lin_sumoffset += offset;
 751         info->lin_sumoffset2 += offset * offset;
 752     }
 753
 754     /*
 755      * Calculate various derived values.   This gets us slope, y-intercept,
 756      * and correlation from the linear regression.
 757      */
 758     if (info->lin_count > 1) {
 759         info->lin_cache_slope =
 760          (info->lin_count * info->lin_sumxy - info->lin_sumx * info->lin_sumy) /
 761          (info->lin_count * info->lin_sumx2 - info->lin_sumx * info->lin_sumx);
 762
 763         info->lin_cache_yint =
 764          (info->lin_sumy - info->lin_cache_slope * info->lin_sumx) /
 765          (info->lin_count);
 766
 767         info->lin_cache_corr =
 768          (info->lin_count * info->lin_sumxy - info->lin_sumx * info->lin_sumy) /
 769          sqrt((info->lin_count * info->lin_sumx2 -
 770                       info->lin_sumx * info->lin_sumx) *
 771              (info->lin_count * info->lin_sumy2 -
 772                       info->lin_sumy * info->lin_sumy)
 773          );
 774     }
 775
 776     /*
 777      * Calculate more derived values.  This gets us the standard-deviation
 778      * of offsets.  The standard deviation approximately means that 68%
 779      * of the samples fall within the calculated stddev of the mean.
 780      */
 781     if (info->lin_countoffset > 1) {
 782          info->lin_cache_stddev =
 783              sqrt((info->lin_sumoffset2 -
 784                  ((info->lin_sumoffset * info->lin_sumoffset /
 785                    info->lin_countoffset))) /
 786                  (info->lin_countoffset - 1.0));
 787     }
 788
 789     /*
 790      * Save the most recent offset, we might use it in the future.
 791      * Save the frequency correction (we might scale the slope later so
 792      * we have a separate field for the actual frequency correction in
 793      * seconds per second).
 794      */
 795     info->lin_cache_offset = offset;
 796     info->lin_cache_freq = info->lin_cache_slope;
 797
 798     if (debug_level >= 4) {
 799         logdebuginfo(info, 4, "iter=%2d time=%7.3f off=%+.6f uoff=%+.6f",
 800             (int)info->lin_count,
 801             time_axis, offset, uncorrected_offset);
 802         if (info->lin_count > 1) {
 803             logdebug(4, " slope %+7.6f"
 804                             " yint %+3.2f corr %+7.6f freq_ppm %+4.2f",
 805                 info->lin_cache_slope,
 806                 info->lin_cache_yint,
 807                 info->lin_cache_corr,
 808                 info->lin_cache_freq * 1000000.0);
 809         }
 810         if (info->lin_countoffset > 1) {
 811             logdebug(4, " stddev %7.6f", info->lin_cache_stddev);
 812         } else if (calc_offset_correction == 0) {
 813             /* cannot calculate offset correction due to prior correction */
 814             logdebug(4, " offset_ignored");
 815         }
 816         logdebug(4, "\n");
 817     }
 818 }
 819
 820 /*
 821  * Reset the linear regression data.  The info structure will not again be
 822  * a candidate for frequency or offset correction until sufficient data
 823  * has been accumulated to make a decision.
 824  */
 825 void
 826 lin_reset(server_info_t info)
 827 {
 828     server_info_t scan;
 829
 830     info->lin_count = 0;
 831     info->lin_sumx = 0;
 832     info->lin_sumy = 0;
 833     info->lin_sumxy = 0;
 834     info->lin_sumx2 = 0;
 835     info->lin_sumy2 = 0;
 836
 837     info->lin_countoffset = 0;
 838     info->lin_sumoffset = 0;
 839     info->lin_sumoffset2 = 0;
 840
 841     info->lin_cache_slope = 0;
 842     info->lin_cache_yint = 0;
 843     info->lin_cache_corr = 0;
 844     info->lin_cache_offset = 0;
 845     info->lin_cache_freq = 0;
 846
 847     /*
 848      * Destroy any additional alternative regressions.
 849      */
 850     while ((scan = info->altinfo) != NULL) {
 851         info->altinfo = scan->altinfo;
 852         free(scan);
 853     }
 854 }
 855
 856 /*
 857  * Sometimes we want to clean out the offset calculations without
 858  * destroying the linear regression used to figure out the frequency
 859  * correction.  This usually occurs whenever we issue an offset
 860  * adjustment to the system, which invalidates any offset data accumulated
 861  * up to that point.
 862  */
 863 void
 864 lin_resetalloffsets(struct server_info **info_ary, int count)
 865 {
 866     server_info_t info;
 867     int i;
 868
 869     for (i = 0; i < count; ++i) {
 870         for (info = info_ary[i]; info; info = info->altinfo)
 871             lin_resetoffsets(info);
 872     }
 873 }
 874
 875 void
 876 lin_resetoffsets(server_info_t info)
 877 {
 878     info->lin_countoffset = 0;
 879     info->lin_sumoffset = 0;
 880     info->lin_sumoffset2 = 0;
 881 }
 882
 883 void
 884 client_setserverstate(server_info_t info, int state, const char *str)
 885 {
 886     if (info->server_state != state) {
 887         info->server_state = state;
 888         logdebuginfo(info, 1, "%s\n", str);
 889     }
 890 }
 891