Merge OpenZFS support in to HEAD.
[freebsd.git] / cddl / usr.sbin / zfsd / zfsd.cc
1 /*-
2  * Copyright (c) 2011, 2012, 2013, 2014, 2015, 2016  Spectra Logic Corporation
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions, and the following disclaimer,
10  *    without modification.
11  * 2. Redistributions in binary form must reproduce at minimum a disclaimer
12  *    substantially similar to the "NO WARRANTY" disclaimer below
13  *    ("Disclaimer") and any redistribution must be conditioned upon
14  *    including a substantially similar Disclaimer requirement for further
15  *    binary redistribution.
16  *
17  * NO WARRANTY
18  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR
21  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22  * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
26  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
27  * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
28  * POSSIBILITY OF SUCH DAMAGES.
29  *
30  * Authors: Justin T. Gibbs     (Spectra Logic Corporation)
31  */
32
33 /**
34  * \file zfsd.cc
35  *
36  * The ZFS daemon consumes kernel devdctl(4) event data via devd(8)'s
37  * unix domain socket in order to react to system changes that impact
38  * the function of ZFS storage pools.  The goal of this daemon is to
39  * provide similar functionality to the Solaris ZFS Diagnostic Engine
40  * (zfs-diagnosis), the Solaris ZFS fault handler (zfs-retire), and
41  * the Solaris ZFS vdev insertion agent (zfs-mod sysevent handler).
42  */
43
44 #include <sys/cdefs.h>
45 #include <sys/byteorder.h>
46 #include <sys/param.h>
47 #include <sys/fs/zfs.h>
48
49 #include <err.h>
50 #include <fcntl.h>
51 #include <libgeom.h>
52 #include <libutil.h>
53 #include <poll.h>
54 #include <syslog.h>
55
56 #include <libzfs.h>
57
58 #include <list>
59 #include <map>
60 #include <string>
61
62 #include <devdctl/guid.h>
63 #include <devdctl/event.h>
64 #include <devdctl/event_factory.h>
65 #include <devdctl/exception.h>
66 #include <devdctl/consumer.h>
67
68 #include "callout.h"
69 #include "vdev_iterator.h"
70 #include "zfsd_event.h"
71 #include "case_file.h"
72 #include "vdev.h"
73 #include "vdev_iterator.h"
74 #include "zfsd.h"
75 #include "zfsd_exception.h"
76 #include "zpool_list.h"
77
78 __FBSDID("$FreeBSD$");
79
80 /*================================== Macros ==================================*/
81 #define NUM_ELEMENTS(x) (sizeof(x) / sizeof(*x))
82
83 /*============================ Namespace Control =============================*/
84 using DevdCtl::Event;
85 using DevdCtl::EventFactory;
86 using DevdCtl::EventList;
87
88 /*================================ Global Data ===============================*/
89 int              g_debug = 0;
90 libzfs_handle_t *g_zfsHandle;
91
92 /*--------------------------------- ZfsDaemon --------------------------------*/
93 //- ZfsDaemon Static Private Data ----------------------------------------------
94 ZfsDaemon           *ZfsDaemon::s_theZfsDaemon;
95 bool                 ZfsDaemon::s_logCaseFiles;
96 bool                 ZfsDaemon::s_terminateEventLoop;
97 char                 ZfsDaemon::s_pidFilePath[] = "/var/run/zfsd.pid";
98 pidfh               *ZfsDaemon::s_pidFH;
99 int                  ZfsDaemon::s_signalPipeFD[2];
100 bool                 ZfsDaemon::s_systemRescanRequested(false);
101 EventFactory::Record ZfsDaemon::s_registryEntries[] =
102 {
103         { Event::NOTIFY, "GEOM",  &GeomEvent::Builder },
104         { Event::NOTIFY, "ZFS",   &ZfsEvent::Builder }
105 };
106
107 //- ZfsDaemon Static Public Methods --------------------------------------------
108 ZfsDaemon &
109 ZfsDaemon::Get()
110 {
111         return (*s_theZfsDaemon);
112 }
113
114 void
115 ZfsDaemon::WakeEventLoop()
116 {
117         write(s_signalPipeFD[1], "+", 1);
118 }
119
120 void
121 ZfsDaemon::RequestSystemRescan()
122 {
123         s_systemRescanRequested = true;
124         ZfsDaemon::WakeEventLoop();
125 }
126
127 void
128 ZfsDaemon::Run()
129 {
130         ZfsDaemon daemon;
131
132         while (s_terminateEventLoop == false) {
133
134                 try {
135                         daemon.DisconnectFromDevd();
136
137                         if (daemon.ConnectToDevd() == false) {
138                                 sleep(30);
139                                 continue;
140                         }
141
142                         daemon.DetectMissedEvents();
143
144                         daemon.EventLoop();
145
146                 } catch (const DevdCtl::Exception &exp) {
147                         exp.Log();
148                 }
149         }
150
151         daemon.DisconnectFromDevd();
152 }
153
154 //- ZfsDaemon Private Methods --------------------------------------------------
155 ZfsDaemon::ZfsDaemon()
156  : Consumer(/*defBuilder*/NULL, s_registryEntries,
157             NUM_ELEMENTS(s_registryEntries))
158 {
159         if (s_theZfsDaemon != NULL)
160                 errx(1, "Multiple ZfsDaemon instances created. Exiting");
161
162         s_theZfsDaemon = this;
163
164         if (pipe(s_signalPipeFD) != 0)
165                 errx(1, "Unable to allocate signal pipe. Exiting");
166
167         if (fcntl(s_signalPipeFD[0], F_SETFL, O_NONBLOCK) == -1)
168                 errx(1, "Unable to set pipe as non-blocking. Exiting");
169
170         if (fcntl(s_signalPipeFD[1], F_SETFL, O_NONBLOCK) == -1)
171                 errx(1, "Unable to set pipe as non-blocking. Exiting");
172
173         signal(SIGHUP,  ZfsDaemon::RescanSignalHandler);
174         signal(SIGINFO, ZfsDaemon::InfoSignalHandler);
175         signal(SIGINT,  ZfsDaemon::QuitSignalHandler);
176         signal(SIGTERM, ZfsDaemon::QuitSignalHandler);
177         signal(SIGUSR1, ZfsDaemon::RescanSignalHandler);
178
179         g_zfsHandle = libzfs_init();
180         if (g_zfsHandle == NULL)
181                 errx(1, "Unable to initialize ZFS library. Exiting");
182
183         Callout::Init();
184         InitializeSyslog();
185         OpenPIDFile();
186
187         if (g_debug == 0)
188                 daemon(0, 0);
189
190         UpdatePIDFile();
191 }
192
193 ZfsDaemon::~ZfsDaemon()
194 {
195         PurgeCaseFiles();
196         ClosePIDFile();
197 }
198
199 void
200 ZfsDaemon::PurgeCaseFiles()
201 {
202         CaseFile::PurgeAll();
203 }
204
205 bool
206 ZfsDaemon::VdevAddCaseFile(Vdev &vdev, void *cbArg)
207 {
208         if (vdev.State() != VDEV_STATE_HEALTHY)
209                 CaseFile::Create(vdev);
210
211         return (/*break early*/false);
212 }
213
214 void
215 ZfsDaemon::BuildCaseFiles()
216 {
217         ZpoolList zpl;
218         ZpoolList::iterator pool;
219
220         /* Add CaseFiles for vdevs with issues. */
221         for (pool = zpl.begin(); pool != zpl.end(); pool++)
222                 VdevIterator(*pool).Each(VdevAddCaseFile, NULL);
223
224         /* De-serialize any saved cases. */
225         CaseFile::DeSerialize();
226
227         /* Simulate config_sync events to force CaseFile reevaluation */
228         for (pool = zpl.begin(); pool != zpl.end(); pool++) {
229                 char evString[160];
230                 Event *event;
231                 nvlist_t *config;
232                 uint64_t poolGUID;
233                 const char *poolname;
234
235                 poolname = zpool_get_name(*pool);
236                 config = zpool_get_config(*pool, NULL);
237                 if (config == NULL) {
238                         syslog(LOG_ERR, "ZFSDaemon::BuildCaseFiles: Could not "
239                             "find pool config for pool %s", poolname);
240                         continue;
241                 }
242                 if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID,
243                                      &poolGUID) != 0) {
244                         syslog(LOG_ERR, "ZFSDaemon::BuildCaseFiles: Could not "
245                             "find pool guid for pool %s", poolname);
246                         continue;
247                 }
248
249                 
250                 snprintf(evString, 160, "!system=ZFS subsystem=ZFS "
251                     "type=misc.fs.zfs.config_sync sub_type=synthesized "
252                     "pool_name=%s pool_guid=%" PRIu64 "\n", poolname, poolGUID);
253                 event = Event::CreateEvent(GetFactory(), string(evString));
254                 if (event != NULL) {
255                         event->Process();
256                         delete event;
257                 }
258         }
259 }
260
261 void
262 ZfsDaemon::RescanSystem()
263 {
264         struct gmesh      mesh;
265         struct gclass    *mp;
266         struct ggeom     *gp;
267         struct gprovider *pp;
268         int               result;
269
270         /*
271          * The devdctl system doesn't replay events for new consumers
272          * of the interface.  Emit manufactured DEVFS arrival events
273          * for any devices that already before we started or during
274          * periods where we've lost our connection to devd.
275          */
276         result = geom_gettree(&mesh);
277         if (result != 0) {
278                 syslog(LOG_ERR, "ZfsDaemon::RescanSystem: "
279                        "geom_gettree faild with error %d\n", result);
280                 return;
281         }
282
283         const string evStart("!system=DEVFS subsystem=CDEV type=CREATE "
284                              "sub_type=synthesized cdev=");
285         LIST_FOREACH(mp, &mesh.lg_class, lg_class) {
286                 LIST_FOREACH(gp, &mp->lg_geom, lg_geom) {
287                         LIST_FOREACH(pp, &gp->lg_provider, lg_provider) {
288                                 Event *event;
289
290                                 string evString(evStart + pp->lg_name + "\n");
291                                 event = Event::CreateEvent(GetFactory(),
292                                                            evString);
293                                 if (event != NULL) {
294                                         if (event->Process())
295                                                 SaveEvent(*event);
296                                         delete event;
297                                 }
298                         }
299                 }
300         }
301         geom_deletetree(&mesh);
302 }
303
304 void
305 ZfsDaemon::DetectMissedEvents()
306 {
307         do {
308                 PurgeCaseFiles();
309
310                 /*
311                  * Discard any events waiting for us.  We don't know
312                  * if they still apply to the current state of the
313                  * system.
314                  */
315                 FlushEvents();
316
317                 BuildCaseFiles();
318
319                 /*
320                  * If the system state has changed during our
321                  * interrogation, start over.
322                  */
323         } while (s_terminateEventLoop == false && EventsPending());
324
325         RescanSystem();
326 }
327
328 void
329 ZfsDaemon::EventLoop()
330 {
331         while (s_terminateEventLoop == false) {
332                 struct pollfd fds[2];
333                 int           result;
334
335                 if (s_logCaseFiles == true) {
336                         EventList::iterator event(m_unconsumedEvents.begin());
337                         s_logCaseFiles = false;
338                         CaseFile::LogAll();
339                         while (event != m_unconsumedEvents.end())
340                                 (*event++)->Log(LOG_INFO);
341                 }
342
343                 Callout::ExpireCallouts();
344
345                 /* Wait for data. */
346                 fds[0].fd      = m_devdSockFD;
347                 fds[0].events  = POLLIN;
348                 fds[0].revents = 0;
349                 fds[1].fd      = s_signalPipeFD[0];
350                 fds[1].events  = POLLIN;
351                 fds[1].revents = 0;
352                 result = poll(fds, NUM_ELEMENTS(fds), /*timeout*/INFTIM);
353                 if (result == -1) {
354                         if (errno == EINTR)
355                                 continue;
356                         else
357                                 err(1, "Polling for devd events failed");
358                 } else if (result == 0) {
359                         errx(1, "Unexpected result of 0 from poll. Exiting");
360                 }
361
362                 if ((fds[0].revents & POLLIN) != 0)
363                         ProcessEvents();
364
365                 if ((fds[1].revents & POLLIN) != 0) {
366                         static char discardBuf[128];
367
368                         /*
369                          * This pipe exists just to close the signal
370                          * race.  Its contents are of no interest to
371                          * us, but we must ensure that future signals
372                          * have space in the pipe to write.
373                          */
374                         while (read(s_signalPipeFD[0], discardBuf,
375                                     sizeof(discardBuf)) > 0)
376                                 ;
377                 }
378
379                 if (s_systemRescanRequested == true) {
380                         s_systemRescanRequested = false;
381                         syslog(LOG_INFO, "System Rescan request processed.");
382                         RescanSystem();
383                 }
384
385                 if ((fds[0].revents & POLLERR) != 0) {
386                         syslog(LOG_INFO, "POLLERROR detected on devd socket.");
387                         break;
388                 }
389
390                 if ((fds[0].revents & POLLHUP) != 0) {
391                         syslog(LOG_INFO, "POLLHUP detected on devd socket.");
392                         break;
393                 }
394         }
395 }
396 //- ZfsDaemon staic Private Methods --------------------------------------------
397 void
398 ZfsDaemon::InfoSignalHandler(int)
399 {
400         s_logCaseFiles = true;
401         ZfsDaemon::WakeEventLoop();
402 }
403
404 void
405 ZfsDaemon::RescanSignalHandler(int)
406 {
407         RequestSystemRescan();
408 }
409
410 void
411 ZfsDaemon::QuitSignalHandler(int)
412 {
413         s_terminateEventLoop = true;
414         ZfsDaemon::WakeEventLoop();
415 }
416
417 void
418 ZfsDaemon::OpenPIDFile()
419 {
420         pid_t otherPID;
421
422         s_pidFH = pidfile_open(s_pidFilePath, 0600, &otherPID);
423         if (s_pidFH == NULL) {
424                 if (errno == EEXIST)
425                         errx(1, "already running as PID %d. Exiting", otherPID);
426                 warn("cannot open PID file");
427         }
428 }
429
430 void
431 ZfsDaemon::UpdatePIDFile()
432 {
433         if (s_pidFH != NULL)
434                 pidfile_write(s_pidFH);
435 }
436
437 void
438 ZfsDaemon::ClosePIDFile()
439 {
440         if (s_pidFH != NULL)
441                 pidfile_remove(s_pidFH);
442 }
443
444 void
445 ZfsDaemon::InitializeSyslog()
446 {
447         openlog("zfsd", LOG_NDELAY, LOG_DAEMON);
448 }
449