2 * Copyright (c) 1985, 1993
3 * The Regents of the University of California. All rights reserved.
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 * 3. All advertising materials mentioning features or use of this software
14 * must display the following acknowledgement:
15 * This product includes software developed by the University of
16 * California, Berkeley and its contributors.
17 * 4. Neither the name of the University nor the names of its contributors
18 * may be used to endorse or promote products derived from this software
19 * without specific prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36 static char sccsid[] = "@(#)master.c 8.1 (Berkeley) 6/6/93";
38 static const char rcsid[] =
39 "$FreeBSD: src/usr.sbin/timed/timed/master.c,v 1.6 1999/08/28 01:20:17 peter Exp $";
44 #include <sys/types.h>
45 #include <sys/times.h>
48 #include <sys/schedctl.h>
51 #include "pathnames.h"
53 extern int measure_delta;
54 extern jmp_buf jmpenv;
59 static int slvcount; /* slaves listening to our clock */
61 static void mchgdate __P((struct tsp *));
64 extern void logwtmp __P((struct timeval *, struct timeval *));
66 extern void logwtmp __P((char *, char *, char *));
70 * The main function of `master' is to periodically compute the differences
71 * (deltas) between its clock and the clocks of the slaves, to compute the
72 * network average delta, and to send to the slaves the differences between
73 * their individual deltas and the network delta.
74 * While waiting, it receives messages from the slaves (i.e. requests for
75 * master's name, remote requests to set the network time, ...), and
76 * takes the appropriate action.
85 struct timeval wait, ntime;
87 struct tsp *msg, *answer, to;
89 struct sockaddr_in taddr;
90 char tname[MAXHOSTNAMELEN];
94 syslog(LOG_NOTICE, "This machine is master");
96 fprintf(fd, "This machine is master\n");
97 for (ntp = nettab; ntp != NULL; ntp = ntp->next) {
98 if (ntp->status == MASTER)
101 (void)gettimeofday(&ntime, 0);
102 pollingtime = ntime.tv_sec+3;
108 /* Process all outstanding messages before spending the long time necessary
109 * to update all timers.
112 (void)gettimeofday(&ntime, 0);
113 wait.tv_sec = pollingtime - ntime.tv_sec;
117 msg = readmsg(TSP_ANY, ANYADDR, &wait, 0);
119 (void)gettimeofday(&ntime, 0);
120 if (ntime.tv_sec >= pollingtime) {
121 pollingtime = ntime.tv_sec + SAMPLEINTVL;
124 /* If a bogus master told us to quit, we can have decided to ignore a
125 * network. Therefore, periodically try to take over everything.
127 polls = (polls + 1) % POLLRATE;
128 if (0 == polls && nignorednets > 0) {
129 trace_msg("Looking for nets to re-master\n");
130 for (ntp = nettab; ntp; ntp = ntp->next) {
131 if (ntp->status == IGNORE
132 || ntp->status == NOMASTER) {
134 if (ntp->status == MASTER) {
139 if (ntp->status == MASTER
140 && --ntp->quit_count < 0)
149 for (ntp = nettab; ntp != NULL; ntp = ntp->next) {
150 to.tsp_type = TSP_LOOP;
151 to.tsp_vers = TSPVERSION;
152 to.tsp_seq = sequence++;
153 to.tsp_hopcnt = MAX_HOPCNT;
154 (void)strcpy(to.tsp_name, hostname);
156 if (sendto(sock, (char *)&to,
157 sizeof(struct tsp), 0,
158 (struct sockaddr*)&ntp->dest_addr,
159 sizeof(ntp->dest_addr)) < 0) {
160 trace_sendto_err(ntp->dest_addr.sin_addr);
167 switch (msg->tsp_type) {
178 * XXX check to see it is from ourself
181 (void)cftime(newdate, "%D %T", &msg->tsp_time.tv_sec);
183 tsp_time_sec = msg->tsp_time.tv_sec;
184 (void)strcpy(newdate, ctime(&tsp_time_sec));
186 if (!good_host_name(msg->tsp_name)) {
188 "attempted date change by %s to %s",
189 msg->tsp_name, newdate);
195 (void)gettimeofday(&ntime, 0);
196 pollingtime = ntime.tv_sec + SAMPLEINTVL;
200 if (!fromnet || fromnet->status != MASTER)
203 (void)cftime(newdate, "%D %T", &msg->tsp_time.tv_sec);
205 tsp_time_sec = msg->tsp_time.tv_sec;
206 (void)strcpy(newdate, ctime(&tsp_time_sec));
208 htp = findhost(msg->tsp_name);
211 "attempted SET DATEREQ by uncontrolled %s to %s",
212 msg->tsp_name, newdate);
215 if (htp->seq == msg->tsp_seq)
217 htp->seq = msg->tsp_seq;
220 "attempted SET DATEREQ by untrusted %s to %s",
221 msg->tsp_name, newdate);
227 (void)gettimeofday(&ntime, 0);
228 pollingtime = ntime.tv_sec + SAMPLEINTVL;
232 xmit(TSP_ACK, msg->tsp_seq, &from);
243 traceoff("Tracing ended at %s\n");
249 if (fromnet->status == MASTER) {
251 (void)addmach(msg->tsp_name, &from,fromnet);
254 (void)strcpy(tname, msg->tsp_name);
255 to.tsp_type = TSP_QUIT;
256 (void)strcpy(to.tsp_name, hostname);
257 answer = acksend(&to, &taddr, tname,
259 if (answer == NULL) {
260 syslog(LOG_ERR, "election error by %s",
267 * After a network partition, there can be
268 * more than one master: the first slave to
269 * come up will notify here the situation.
271 if (!fromnet || fromnet->status != MASTER)
273 (void)strcpy(to.tsp_name, hostname);
275 /* The other master often gets into the same state,
276 * with boring results if we stay at it forever.
278 ntp = fromnet; /* (acksend() can leave fromnet=0 */
279 for (i = 0; i < 3; i++) {
280 to.tsp_type = TSP_RESOLVE;
281 (void)strcpy(to.tsp_name, hostname);
282 answer = acksend(&to, &ntp->dest_addr,
283 ANYADDR, TSP_MASTERACK,
287 htp = addmach(answer->tsp_name,&from,ntp);
288 to.tsp_type = TSP_QUIT;
289 msg = acksend(&to, &htp->addr, htp->name,
290 TSP_ACK, 0, htp->noanswer);
293 "no response from %s to CONFLICT-QUIT",
302 if (!fromnet || fromnet->status != MASTER)
305 * do not want to call synch() while waiting
308 (void)gettimeofday(&ntime, (struct timezone *)0);
309 pollingtime = ntime.tv_sec + SAMPLEINTVL;
313 doquit(msg); /* become a slave */
317 if (!fromnet || fromnet->status != MASTER
318 || !strcmp(msg->tsp_name, hostname))
321 * We should not have received this from a net
322 * we are master on. There must be two masters.
324 htp = addmach(msg->tsp_name, &from,fromnet);
325 to.tsp_type = TSP_QUIT;
326 (void)strcpy(to.tsp_name, hostname);
327 answer = acksend(&to, &htp->addr, htp->name,
331 "loop breakage: no reply from %s=%s to QUIT",
332 htp->name, inet_ntoa(htp->addr.sin_addr));
339 "\tnets = %d, masters = %d, slaves = %d, ignored = %d\n",
340 nnets, nmasternets, nslavenets, nignorednets);
349 fprintf(fd, "garbage message: ");
360 * change the system date on the master
366 char tname[MAXHOSTNAMELEN];
368 struct timeval otime, ntime;
370 (void)strcpy(tname, msg->tsp_name);
372 xmit(TSP_DATEACK, msg->tsp_seq, &from);
374 (void)strcpy(olddate, date());
376 /* adjust time for residence on the queue */
377 (void)gettimeofday(&otime, 0);
378 adj_msg_time(msg,&otime);
380 timevalsub(&ntime, &msg->tsp_time, &otime);
381 if (ntime.tv_sec < MAXADJ && ntime.tv_sec > -MAXADJ) {
383 * do not change the clock if we can adjust it
386 synch(tvtomsround(ntime));
389 if (0 > settimeofday(&msg->tsp_time, 0)) {
390 syslog(LOG_ERR, "settimeofday(): %m");
392 logwtmp(&otime, &msg->tsp_time);
394 logwtmp("|", "date", "");
395 (void)settimeofday(&msg->tsp_time, 0);
396 logwtmp("{", "date", "");
401 syslog(LOG_NOTICE, "date changed by %s from %s",
407 * synchronize all of the slaves
415 struct timeval check, stop, wait;
422 fprintf(fd, "measurements starting at %s\n", date());
423 (void)gettimeofday(&check, 0);
425 /* run fast to get good time */
426 pri = schedctl(NDPRI,0,NDPHIMIN);
428 syslog(LOG_ERR, "schedctl(): %m");
430 for (htp = self.l_fwd; htp != &self; htp = htp->l_fwd) {
431 if (htp->noanswer != 0) {
432 measure_status = measure(500, 100,
436 measure_status = measure(3000, 100,
440 if (measure_status != GOOD) {
441 /* The slave did not respond. We have
442 * just wasted lots of time on it.
444 htp->delta = HOSTDOWN;
445 if (++htp->noanswer >= LOSTHOST) {
448 "purging %s for not answering ICMP\n",
455 htp->delta = measure_delta;
457 (void)gettimeofday(&stop, 0);
458 timevalsub(&stop, &stop, &check);
459 if (stop.tv_sec >= 1) {
463 * ack messages periodically
467 if (0 != readmsg(TSP_TRACEON,ANYADDR,
470 (void)gettimeofday(&check, 0);
475 (void)schedctl(NDPRI,0,pri);
478 fprintf(fd, "measurements finished at %s\n", date());
480 if (!(status & SLAVE)) {
482 mydelta = networkdelta();
487 if (trace && (mydelta != 0 || (status & SLAVE)))
488 fprintf(fd,"local correction of %ld ms.\n", mydelta);
493 * sends the time to each slave after the master
494 * has received the command to set the network time
503 /* Do not listen to the consensus after forcing the time. This is because
504 * the consensus takes a while to reach the time we are dictating.
507 for (htp = self.l_fwd; htp != &self; htp = htp->l_fwd) {
508 to.tsp_type = TSP_SETTIME;
509 (void)strcpy(to.tsp_name, hostname);
510 (void)gettimeofday(&to.tsp_time, 0);
511 answer = acksend(&to, &htp->addr, htp->name,
512 TSP_ACK, 0, htp->noanswer);
514 /* We client does not respond, then we have
515 * just wasted lots of time on it.
518 "no reply to SETTIME from %s", htp->name);
519 if (++htp->noanswer >= LOSTHOST) {
522 "purging %s for not answering",
536 static time_t next_time;
543 if (!fd) /* quit if tracing already off */
546 this_time = times(&tm);
547 if (this_time + delta < next_time)
549 next_time = this_time + CLK_TCK;
551 fprintf(fd, "host table: %d entries at %s\n", slvcount, date());
554 for (i = 1; i <= slvcount; i++, htp = htp->l_fwd) {
555 l = strlen(htp->name) + 1;
556 if (length+l >= 80) {
561 fprintf(fd, " %s", htp->name);
567 static struct hosttbl *newhost_hash;
568 static struct hosttbl *lasthfree = &hosttbl[0];
571 struct hosttbl * /* answer or 0 */
580 for (p = name, i = 0; i < 8 && *p != '\0'; i++, p++)
582 newhost_hash = &hosttbl[j % NHOSTS];
585 if (htp->name[0] == '\0')
588 if (!strcmp(name, htp->name))
591 } while (htp != newhost_hash);
596 * add a host to the list of controlled machines if not already there
599 addmach(name, addr, ntp)
601 struct sockaddr_in *addr;
604 struct hosttbl *ret, *p, *b, *f;
606 ret = findhost(name);
608 if (slvcount >= NHOSTS) {
610 fprintf(fd, "no more slots in host table\n");
613 syslog(LOG_ERR, "no more slots in host table");
615 longjmp(jmpenv, 2); /* give up and be a slave */
618 /* if our home hash slot is occupied, find a free entry
621 if (newhost_hash->name[0] != '\0') {
624 if (++lasthfree > &hosttbl[NHOSTS])
625 lasthfree = &hosttbl[1];
626 } while (ret->name[0] != '\0');
628 if (!newhost_hash->head) {
629 /* Move an interloper using our home. Use
630 * scratch pointers in case the new head is
631 * pointing to itself.
633 f = newhost_hash->h_fwd;
634 b = newhost_hash->h_bak;
637 f = newhost_hash->l_fwd;
638 b = newhost_hash->l_bak;
641 bcopy(newhost_hash,ret,sizeof(*ret));
647 /* link to an existing chain in our home
650 p = newhost_hash->h_bak;
651 ret->h_fwd = newhost_hash;
654 newhost_hash->h_bak = ret;
664 (void)strncpy(ret->name, name, sizeof(ret->name));
665 ret->good = good_host_name(name);
667 ret->l_bak = self.l_bak;
668 self.l_bak->l_fwd = ret;
676 ret->noanswer = (ret->noanswer != 0);
679 /* need to clear sequence number anyhow */
685 * remove the machine with the given index in the host table.
691 struct hosttbl *lprv, *hnxt, *f, *b;
694 fprintf(fd, "remove %s\n", htp->name);
696 /* get out of the lists */
697 htp->l_fwd->l_bak = lprv = htp->l_bak;
698 htp->l_bak->l_fwd = htp->l_fwd;
699 htp->h_fwd->h_bak = htp->h_bak;
700 htp->h_bak->h_fwd = hnxt = htp->h_fwd;
702 /* If we are in the home slot, pull up the chain */
703 if (htp->head && hnxt != htp) {
707 /* Use scratch pointers in case the new head is pointing to
719 bcopy(hnxt, htp, sizeof(*htp));
725 lasthfree->name[0] = '\0';
726 lasthfree->h_fwd = 0;
727 lasthfree->l_fwd = 0;
735 * Remove all the machines from the host table that exist on the given
736 * network. This is called when a master transitions to a slave on a
747 for (htp = self.l_fwd; htp != &self; htp = htp->l_fwd) {
759 xmit(TSP_MASTERUP, 0, &net->dest_addr);
762 * Do not tell new slaves our time for a while. This ensures
763 * we do not tell them to start using our time, before we have
764 * found a good master.
766 (void)gettimeofday(&net->slvwait, 0);
774 struct tsp *answer, to;
777 if (!fromnet || fromnet->status != MASTER)
780 htp = addmach(msg->tsp_name, &from,fromnet);
781 htp->seq = msg->tsp_seq;
786 * If we are stable, send our time to the slave.
787 * Do not go crazy if the date has been changed.
789 (void)gettimeofday(&now, 0);
790 if (now.tv_sec >= fromnet->slvwait.tv_sec+3
791 || now.tv_sec < fromnet->slvwait.tv_sec) {
792 to.tsp_type = TSP_SETTIME;
793 (void)strcpy(to.tsp_name, hostname);
794 (void)gettimeofday(&to.tsp_time, 0);
795 answer = acksend(&to, &htp->addr,
802 "no reply to initial SETTIME from %s",
804 htp->noanswer = LOSTHOST;
811 * react to a TSP_QUIT:
817 if (fromnet->status == MASTER) {
818 if (!good_host_name(msg->tsp_name)) {
819 if (fromnet->quit_count <= 0) {
820 syslog(LOG_NOTICE,"untrusted %s told us QUIT",
822 suppress(&from, msg->tsp_name, fromnet);
823 fromnet->quit_count = 1;
826 syslog(LOG_NOTICE, "untrusted %s told us QUIT twice",
828 fromnet->quit_count = 2;
829 fromnet->status = NOMASTER;
831 fromnet->status = SLAVE;
834 longjmp(jmpenv, 2); /* give up and be a slave */
837 if (!good_host_name(msg->tsp_name)) {
838 syslog(LOG_NOTICE, "untrusted %s told us QUIT",
840 fromnet->quit_count = 2;
849 fd = fopen(_PATH_TIMEDLOG, "w");
854 fprintf(fd,"Tracing started at %s\n", date());
871 fprintf(fd, msg, date());
886 logwtmp(otime, ntime)
887 struct timeval *otime, *ntime;
889 static struct utmp wtmp[2] = {
890 {"","",OTIME_MSG,0,OLD_TIME,0,0,0},
891 {"","",NTIME_MSG,0,NEW_TIME,0,0,0}
893 static char *wtmpfile = WTMP_FILE;
896 wtmp[0].ut_time = otime->tv_sec + (otime->tv_usec + 500000) / 1000000;
897 wtmp[1].ut_time = ntime->tv_sec + (ntime->tv_usec + 500000) / 1000000;
898 if (wtmp[0].ut_time == wtmp[1].ut_time)
902 (void)pututline(&wtmp[0]);
903 (void)pututline(&wtmp[1]);
905 if ((f = open(wtmpfile, O_WRONLY|O_APPEND)) >= 0) {
906 (void) write(f, (char *)wtmp, sizeof(wtmp));