2 * Copyright (c) 1985, 1993
3 * The Regents of the University of California. All rights reserved.
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 * 3. All advertising materials mentioning features or use of this software
14 * must display the following acknowledgement:
15 * This product includes software developed by the University of
16 * California, Berkeley and its contributors.
17 * 4. Neither the name of the University nor the names of its contributors
18 * may be used to endorse or promote products derived from this software
19 * without specific prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33 * @(#)master.c 8.1 (Berkeley) 6/6/93
34 * $FreeBSD: src/usr.sbin/timed/timed/master.c,v 1.6 1999/08/28 01:20:17 peter Exp $
39 #include <sys/types.h>
40 #include <sys/times.h>
43 #include "pathnames.h"
45 extern int measure_delta;
46 extern jmp_buf jmpenv;
51 static int slvcount; /* slaves listening to our clock */
53 static void mchgdate(struct tsp *);
56 * The main function of `master' is to periodically compute the differences
57 * (deltas) between its clock and the clocks of the slaves, to compute the
58 * network average delta, and to send to the slaves the differences between
59 * their individual deltas and the network delta.
60 * While waiting, it receives messages from the slaves (i.e. requests for
61 * master's name, remote requests to set the network time, ...), and
62 * takes the appropriate action.
71 struct timeval wait, ntime;
73 struct tsp *msg, *answer, to;
75 struct sockaddr_in taddr;
76 char tname[MAXHOSTNAMELEN];
80 syslog(LOG_NOTICE, "This machine is master");
82 fprintf(fd, "This machine is master\n");
83 for (ntp = nettab; ntp != NULL; ntp = ntp->next) {
84 if (ntp->status == MASTER)
87 gettimeofday(&ntime, 0);
88 pollingtime = ntime.tv_sec+3;
94 /* Process all outstanding messages before spending the long time necessary
95 * to update all timers.
98 gettimeofday(&ntime, 0);
99 wait.tv_sec = pollingtime - ntime.tv_sec;
103 msg = readmsg(TSP_ANY, ANYADDR, &wait, 0);
105 gettimeofday(&ntime, 0);
106 if (ntime.tv_sec >= pollingtime) {
107 pollingtime = ntime.tv_sec + SAMPLEINTVL;
110 /* If a bogus master told us to quit, we can have decided to ignore a
111 * network. Therefore, periodically try to take over everything.
113 polls = (polls + 1) % POLLRATE;
114 if (0 == polls && nignorednets > 0) {
115 trace_msg("Looking for nets to re-master\n");
116 for (ntp = nettab; ntp; ntp = ntp->next) {
117 if (ntp->status == IGNORE
118 || ntp->status == NOMASTER) {
120 if (ntp->status == MASTER) {
125 if (ntp->status == MASTER
126 && --ntp->quit_count < 0)
135 for (ntp = nettab; ntp != NULL; ntp = ntp->next) {
136 to.tsp_type = TSP_LOOP;
137 to.tsp_vers = TSPVERSION;
138 to.tsp_seq = sequence++;
139 to.tsp_hopcnt = MAX_HOPCNT;
140 strlcpy(to.tsp_name, hostname,
141 sizeof(to.tsp_name));
143 if (sendto(sock, (char *)&to,
144 sizeof(struct tsp), 0,
145 (struct sockaddr*)&ntp->dest_addr,
146 sizeof(ntp->dest_addr)) < 0) {
147 trace_sendto_err(ntp->dest_addr.sin_addr);
154 switch (msg->tsp_type) {
165 * XXX check to see it is from ourself
167 tsp_time_sec = msg->tsp_time.tv_sec;
168 strlcpy(newdate, ctime(&tsp_time_sec), sizeof(newdate));
169 if (!good_host_name(msg->tsp_name)) {
171 "attempted date change by %s to %s",
172 msg->tsp_name, newdate);
178 gettimeofday(&ntime, 0);
179 pollingtime = ntime.tv_sec + SAMPLEINTVL;
183 if (!fromnet || fromnet->status != MASTER)
185 tsp_time_sec = msg->tsp_time.tv_sec;
186 strlcpy(newdate, ctime(&tsp_time_sec), sizeof(newdate));
187 htp = findhost(msg->tsp_name);
190 "attempted SET DATEREQ by uncontrolled %s to %s",
191 msg->tsp_name, newdate);
194 if (htp->seq == msg->tsp_seq)
196 htp->seq = msg->tsp_seq;
199 "attempted SET DATEREQ by untrusted %s to %s",
200 msg->tsp_name, newdate);
206 gettimeofday(&ntime, 0);
207 pollingtime = ntime.tv_sec + SAMPLEINTVL;
211 xmit(TSP_ACK, msg->tsp_seq, &from);
222 traceoff("Tracing ended at %s\n");
228 if (fromnet->status == MASTER) {
230 addmach(msg->tsp_name, &from,fromnet);
233 strlcpy(tname, msg->tsp_name, sizeof(tname));
234 to.tsp_type = TSP_QUIT;
235 strlcpy(to.tsp_name, hostname, sizeof(to.tsp_name));
236 answer = acksend(&to, &taddr, tname,
238 if (answer == NULL) {
239 syslog(LOG_ERR, "election error by %s",
246 * After a network partition, there can be
247 * more than one master: the first slave to
248 * come up will notify here the situation.
250 if (!fromnet || fromnet->status != MASTER)
252 strlcpy(to.tsp_name, hostname, sizeof(to.tsp_name));
254 /* The other master often gets into the same state,
255 * with boring results if we stay at it forever.
257 ntp = fromnet; /* (acksend() can leave fromnet=0 */
258 for (i = 0; i < 3; i++) {
259 to.tsp_type = TSP_RESOLVE;
260 strlcpy(to.tsp_name, hostname,
261 sizeof(to.tsp_name));
262 answer = acksend(&to, &ntp->dest_addr,
263 ANYADDR, TSP_MASTERACK,
267 htp = addmach(answer->tsp_name,&from,ntp);
268 to.tsp_type = TSP_QUIT;
269 msg = acksend(&to, &htp->addr, htp->name,
270 TSP_ACK, 0, htp->noanswer);
273 "no response from %s to CONFLICT-QUIT",
282 if (!fromnet || fromnet->status != MASTER)
285 * do not want to call synch() while waiting
288 gettimeofday(&ntime, NULL);
289 pollingtime = ntime.tv_sec + SAMPLEINTVL;
293 doquit(msg); /* become a slave */
297 if (!fromnet || fromnet->status != MASTER
298 || !strcmp(msg->tsp_name, hostname))
301 * We should not have received this from a net
302 * we are master on. There must be two masters.
304 htp = addmach(msg->tsp_name, &from,fromnet);
305 to.tsp_type = TSP_QUIT;
306 strlcpy(to.tsp_name, hostname, sizeof(to.tsp_name));
307 answer = acksend(&to, &htp->addr, htp->name,
311 "loop breakage: no reply from %s=%s to QUIT",
312 htp->name, inet_ntoa(htp->addr.sin_addr));
319 "\tnets = %d, masters = %d, slaves = %d, ignored = %d\n",
320 nnets, nmasternets, nslavenets, nignorednets);
329 fprintf(fd, "garbage message: ");
340 * change the system date on the master
343 mchgdate(struct tsp *msg)
345 char tname[MAXHOSTNAMELEN];
347 struct timeval otime, ntime;
349 strlcpy(tname, msg->tsp_name, sizeof(tname));
351 xmit(TSP_DATEACK, msg->tsp_seq, &from);
353 strlcpy(olddate, date(), sizeof(olddate));
355 /* adjust time for residence on the queue */
356 gettimeofday(&otime, 0);
357 adj_msg_time(msg,&otime);
359 timevalsub(&ntime, &msg->tsp_time, &otime);
360 if (ntime.tv_sec < MAXADJ && ntime.tv_sec > -MAXADJ) {
362 * do not change the clock if we can adjust it
365 synch(tvtomsround(ntime));
367 logwtmp("|", "date", "");
368 settimeofday(&msg->tsp_time, 0);
369 logwtmp("{", "date", "");
373 syslog(LOG_NOTICE, "date changed by %s from %s",
379 * synchronize all of the slaves
386 struct timeval check, stop, wait;
390 fprintf(fd, "measurements starting at %s\n", date());
391 gettimeofday(&check, 0);
392 for (htp = self.l_fwd; htp != &self; htp = htp->l_fwd) {
393 if (htp->noanswer != 0) {
394 measure_status = measure(500, 100,
398 measure_status = measure(3000, 100,
402 if (measure_status != GOOD) {
403 /* The slave did not respond. We have
404 * just wasted lots of time on it.
406 htp->delta = HOSTDOWN;
407 if (++htp->noanswer >= LOSTHOST) {
410 "purging %s for not answering ICMP\n",
417 htp->delta = measure_delta;
419 gettimeofday(&stop, 0);
420 timevalsub(&stop, &stop, &check);
421 if (stop.tv_sec >= 1) {
425 * ack messages periodically
429 if (0 != readmsg(TSP_TRACEON,ANYADDR,
432 gettimeofday(&check, 0);
436 fprintf(fd, "measurements finished at %s\n", date());
438 if (!(status & SLAVE)) {
440 mydelta = networkdelta();
445 if (trace && (mydelta != 0 || (status & SLAVE)))
446 fprintf(fd,"local correction of %ld ms.\n", mydelta);
451 * sends the time to each slave after the master
452 * has received the command to set the network time
461 /* Do not listen to the consensus after forcing the time. This is because
462 * the consensus takes a while to reach the time we are dictating.
465 for (htp = self.l_fwd; htp != &self; htp = htp->l_fwd) {
466 to.tsp_type = TSP_SETTIME;
467 strlcpy(to.tsp_name, hostname, sizeof(to.tsp_name));
468 gettimeofday(&to.tsp_time, 0);
469 answer = acksend(&to, &htp->addr, htp->name,
470 TSP_ACK, 0, htp->noanswer);
471 if (answer == NULL) {
472 /* We client does not respond, then we have
473 * just wasted lots of time on it.
476 "no reply to SETTIME from %s", htp->name);
477 if (++htp->noanswer >= LOSTHOST) {
480 "purging %s for not answering",
493 static time_t next_time;
500 if (!fd) /* quit if tracing already off */
503 this_time = times(&tm);
504 if (this_time + (time_t)delta < next_time)
506 next_time = this_time + CLK_TCK;
508 fprintf(fd, "host table: %d entries at %s\n", slvcount, date());
511 for (i = 1; i <= slvcount; i++, htp = htp->l_fwd) {
512 l = strlen(htp->name) + 1;
513 if (length+l >= 80) {
518 fprintf(fd, " %s", htp->name);
524 static struct hosttbl *newhost_hash;
525 static struct hosttbl *lasthfree = &hosttbl[0];
528 struct hosttbl * /* answer or 0 */
536 for (p = name, i = 0; i < 8 && *p != '\0'; i++, p++)
538 newhost_hash = &hosttbl[j % NHOSTS];
541 if (htp->name[0] == '\0')
544 if (!strcmp(name, htp->name))
547 } while (htp != newhost_hash);
552 * add a host to the list of controlled machines if not already there
555 addmach(char *name, struct sockaddr_in *addr, struct netinfo *ntp)
557 struct hosttbl *ret, *p, *b, *f;
559 ret = findhost(name);
561 if (slvcount >= NHOSTS) {
563 fprintf(fd, "no more slots in host table\n");
566 syslog(LOG_ERR, "no more slots in host table");
568 longjmp(jmpenv, 2); /* give up and be a slave */
571 /* if our home hash slot is occupied, find a free entry
574 if (newhost_hash->name[0] != '\0') {
577 if (++lasthfree > &hosttbl[NHOSTS])
578 lasthfree = &hosttbl[1];
579 } while (ret->name[0] != '\0');
581 if (!newhost_hash->head) {
582 /* Move an interloper using our home. Use
583 * scratch pointers in case the new head is
584 * pointing to itself.
586 f = newhost_hash->h_fwd;
587 b = newhost_hash->h_bak;
590 f = newhost_hash->l_fwd;
591 b = newhost_hash->l_bak;
594 bcopy(newhost_hash,ret,sizeof(*ret));
600 /* link to an existing chain in our home
603 p = newhost_hash->h_bak;
604 ret->h_fwd = newhost_hash;
607 newhost_hash->h_bak = ret;
617 strlcpy(ret->name, name, sizeof(ret->name));
618 ret->good = good_host_name(name);
620 ret->l_bak = self.l_bak;
621 self.l_bak->l_fwd = ret;
629 ret->noanswer = (ret->noanswer != 0);
632 /* need to clear sequence number anyhow */
638 * remove the machine with the given index in the host table.
641 remmach(struct hosttbl *htp)
643 struct hosttbl *lprv, *hnxt, *f, *b;
646 fprintf(fd, "remove %s\n", htp->name);
648 /* get out of the lists */
649 htp->l_fwd->l_bak = lprv = htp->l_bak;
650 htp->l_bak->l_fwd = htp->l_fwd;
651 htp->h_fwd->h_bak = htp->h_bak;
652 htp->h_bak->h_fwd = hnxt = htp->h_fwd;
654 /* If we are in the home slot, pull up the chain */
655 if (htp->head && hnxt != htp) {
659 /* Use scratch pointers in case the new head is pointing to
671 bcopy(hnxt, htp, sizeof(*htp));
677 lasthfree->name[0] = '\0';
678 lasthfree->h_fwd = 0;
679 lasthfree->l_fwd = 0;
687 * Remove all the machines from the host table that exist on the given
688 * network. This is called when a master transitions to a slave on a
692 rmnetmachs(struct netinfo *ntp)
698 for (htp = self.l_fwd; htp != &self; htp = htp->l_fwd) {
707 masterup(struct netinfo *net)
710 xmit(TSP_MASTERUP, 0, &net->dest_addr);
713 * Do not tell new slaves our time for a while. This ensures
714 * we do not tell them to start using our time, before we have
715 * found a good master.
717 gettimeofday(&net->slvwait, 0);
721 newslave(struct tsp *msg)
724 struct tsp *answer, to;
727 if (!fromnet || fromnet->status != MASTER)
730 htp = addmach(msg->tsp_name, &from,fromnet);
731 htp->seq = msg->tsp_seq;
736 * If we are stable, send our time to the slave.
737 * Do not go crazy if the date has been changed.
739 gettimeofday(&now, 0);
740 if (now.tv_sec >= fromnet->slvwait.tv_sec+3
741 || now.tv_sec < fromnet->slvwait.tv_sec) {
742 to.tsp_type = TSP_SETTIME;
743 strlcpy(to.tsp_name, hostname, sizeof(to.tsp_name));
744 gettimeofday(&to.tsp_time, 0);
745 answer = acksend(&to, &htp->addr,
752 "no reply to initial SETTIME from %s",
754 htp->noanswer = LOSTHOST;
761 * react to a TSP_QUIT:
764 doquit(struct tsp *msg)
767 if (fromnet->status == MASTER) {
768 if (!good_host_name(msg->tsp_name)) {
769 if (fromnet->quit_count <= 0) {
770 syslog(LOG_NOTICE,"untrusted %s told us QUIT",
772 suppress(&from, msg->tsp_name, fromnet);
773 fromnet->quit_count = 1;
776 syslog(LOG_NOTICE, "untrusted %s told us QUIT twice",
778 fromnet->quit_count = 2;
779 fromnet->status = NOMASTER;
781 fromnet->status = SLAVE;
784 longjmp(jmpenv, 2); /* give up and be a slave */
787 if (!good_host_name(msg->tsp_name)) {
788 syslog(LOG_NOTICE, "untrusted %s told us QUIT",
790 fromnet->quit_count = 2;
800 fd = fopen(_PATH_TIMEDLOG, "w");
805 fprintf(fd,"Tracing started at %s\n", date());
822 fprintf(fd, msg, date());