743a987498d851f22fdf6d8011b2997b24d80b95
[dragonfly.git] / lib / libevtr / evtr.c
1 /*
2  * Copyright (c) 2009, 2010 Aggelos Economopoulos.  All rights reserved.
3  * 
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions
6  * are met:
7  * 
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in
12  *    the documentation and/or other materials provided with the
13  *    distribution.
14  * 3. Neither the name of The DragonFly Project nor the names of its
15  *    contributors may be used to endorse or promote products derived
16  *    from this software without specific, prior written permission.
17  * 
18  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
21  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
22  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
23  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
24  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
25  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
26  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
27  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
28  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29  * SUCH DAMAGE.
30  */
31
32 #include <assert.h>
33 #include <ctype.h>
34 #include <errno.h>
35 #include <limits.h>
36 #include <stdarg.h>
37 #include <stdio.h>
38 #include <stdlib.h>
39 #include <string.h>
40 #include <sys/queue.h>
41 #include <sys/stat.h>
42 #include <sys/tree.h>
43
44
45 #include "evtr.h"
46
47 enum {
48         MAX_EVHDR_SIZE = PATH_MAX + 200,
49         /* string namespaces */
50         EVTR_NS_PATH = 0x1,
51         EVTR_NS_FUNC,
52         EVTR_NS_DSTR,
53         EVTR_NS_MAX,
54         NR_BUCKETS = 1023, /* XXX */
55         REC_ALIGN = 8,
56         REC_BOUNDARY = 1 << 14,
57         FILTF_ID = 0x10,
58         EVTRF_WR = 0x1,         /* open for writing */
59 };
60
61 typedef uint16_t fileid_t;
62 typedef uint16_t funcid_t;
63 typedef uint16_t fmtid_t;
64
65 struct trace_event_header {
66         uint8_t type;
67         uint64_t ts;    /* XXX: this should only be part of probe */
68 } __attribute__((packed));
69
70 struct probe_event_header {
71         struct trace_event_header eh;
72         /*
73          * For these fields, 0 implies "not available"
74          */
75         fileid_t file;
76         funcid_t caller1;
77         funcid_t caller2;
78         funcid_t func;
79         uint16_t line;
80         fmtid_t fmt;
81         uint16_t datalen;
82         uint8_t cpu;    /* -1 if n/a */
83 } __attribute__((packed));
84
85 struct string_event_header {
86         struct trace_event_header eh;
87         uint16_t ns;
88         uint32_t id;
89         uint16_t len;
90 } __attribute__((packed));
91
92 struct fmt_event_header {
93         struct trace_event_header eh;
94         uint16_t id;
95         uint8_t subsys_len;
96         uint8_t fmt_len;
97 } __attribute__((packed));
98
99 struct cpuinfo_event_header {
100         double freq;
101         uint8_t cpu;
102 } __attribute__((packed));
103
104 struct hashentry {
105         const char *str;
106         uint16_t id;
107         struct hashentry *next;
108 };
109
110 struct hashtab {
111         struct hashentry *buckets[NR_BUCKETS];
112         uint16_t id;
113 };
114
115 struct event_fmt {
116         const char *subsys;
117         const char *fmt;
118 };
119
120 struct event_filter_unresolved {
121         TAILQ_ENTRY(event_filter_unresolved) link;
122         evtr_filter_t filt;
123 };
124
125 struct id_map {
126         RB_ENTRY(id_map) rb_node;
127         int id;
128         const void *data;
129 };
130
131 RB_HEAD(id_tree, id_map);
132 struct string_map {
133         struct id_tree root;
134 };
135
136 struct fmt_map {
137         struct id_tree root;
138 };
139
140 RB_HEAD(thread_tree, evtr_thread);
141
142 struct thread_map {
143         struct thread_tree root;
144 };
145
146 struct event_callback {
147         void (*cb)(evtr_event_t, void *data);
148         void *data;     /* this field must be malloc()ed */
149 };
150
151 struct cpu {
152         struct evtr_thread *td; /* currently executing thread */
153         double freq;
154 };
155
156 struct evtr {
157         FILE *f;
158         int err;
159         int flags;
160         char *errmsg;
161         off_t bytes;
162         union {
163                 /*
164                  * When writing, we keep track of the strings we've
165                  * already dumped so we only dump them once.
166                  * Paths, function names etc belong to different
167                  * namespaces.
168                  */
169                 struct hashtab *strings[EVTR_NS_MAX - 1];
170                 /*
171                  * When reading, we build a map from id to string.
172                  * Every id must be defined at the point of use.
173                  */
174                 struct string_map maps[EVTR_NS_MAX - 1];
175         };
176         union {
177                 /* same as above, but for subsys+fmt pairs */
178                 struct fmt_map fmtmap;
179                 struct hashtab *fmts;
180         };
181         /*
182          * Filters that have a format specified and we
183          * need to resolve that to an fmtid
184          */
185         TAILQ_HEAD(, event_filter_unresolved) unresolved_filtq;
186         struct event_callback **cbs;
187         int ncbs;
188         struct thread_map threads;
189         struct cpu *cpus;
190         int ncpus;
191 };
192
193 struct evtr_query {
194         evtr_t evtr;
195         off_t off;
196         evtr_filter_t filt;
197         int nfilt;
198         int nmatched;
199         int ntried;
200         void *buf;
201         int bufsize;
202 };
203
204 static int
205 evtr_debug = 0;
206
207 void
208 evtr_set_debug(int lvl)
209 {
210         evtr_debug = lvl;
211 }
212
213 static int id_map_cmp(struct id_map *, struct id_map *);
214 RB_PROTOTYPE2(id_tree, id_map, rb_node, id_map_cmp, int);
215 RB_GENERATE2(id_tree, id_map, rb_node, id_map_cmp, int, id);
216
217 static int thread_cmp(struct evtr_thread *, struct evtr_thread *);
218 RB_PROTOTYPE2(thread_tree, evtr_thread, rb_node, thread_cmp, void *);
219 RB_GENERATE2(thread_tree, evtr_thread, rb_node, thread_cmp, void *, id);
220
221 #define printd(...)                             \
222         do {                                    \
223         if (evtr_debug)                         \
224                 fprintf(stderr, __VA_ARGS__);   \
225         } while (0)
226
227 static inline
228 void
229 validate_string(const char *str)
230 {
231         if (!evtr_debug)
232                 return;
233         for (; *str; ++str)
234                 assert(isprint(*str));
235 }
236
237 static
238 void
239 id_tree_free(struct id_tree *root)
240 {
241         struct id_map *v, *n;
242
243         for (v = RB_MIN(id_tree, root); v; v = n) {
244                 n = RB_NEXT(id_tree, root, v);
245                 RB_REMOVE(id_tree, root, v);
246         }
247 }
248
249 static
250 int
251 evtr_register_callback(evtr_t evtr, void (*fn)(evtr_event_t, void *), void *d)
252 {
253         struct event_callback *cb;
254         void *cbs;
255
256         if (!(cb = malloc(sizeof(*cb)))) {
257                 evtr->err = ENOMEM;
258                 return !0;
259         }
260         cb->cb = fn;
261         cb->data = d;
262         if (!(cbs = realloc(evtr->cbs, (++evtr->ncbs) * sizeof(cb)))) {
263                 --evtr->ncbs;
264                 free(cb);
265                 evtr->err = ENOMEM;
266                 return !0;
267         }
268         evtr->cbs = cbs;
269         evtr->cbs[evtr->ncbs - 1] = cb;
270         return 0;
271 }
272
273 static
274 void
275 evtr_deregister_callbacks(evtr_t evtr)
276 {
277         int i;
278
279         for (i = 0; i < evtr->ncbs; ++i) {
280                 free(evtr->cbs[i]);
281         }
282         free(evtr->cbs);
283         evtr->cbs = NULL;
284 }
285
286 static
287 void
288 evtr_run_callbacks(evtr_event_t ev, evtr_t evtr)
289 {
290         struct event_callback *cb;
291         int i;
292
293         for (i = 0; i < evtr->ncbs; ++i) {
294                 cb = evtr->cbs[i];
295                 cb->cb(ev, cb->data);
296         }
297 }
298
299 static
300 struct cpu *
301 evtr_cpu(evtr_t evtr, int c)
302 {
303         if ((c < 0) || (c >= evtr->ncpus))
304                 return NULL;
305         return &evtr->cpus[c];
306 }
307
308 static
309 int
310 parse_format_data(evtr_event_t ev, const char *fmt, ...) __attribute__((format (scanf, 2, 3)));
311 static
312 int
313 parse_format_data(evtr_event_t ev, const char *fmt, ...)
314 {
315         va_list ap;
316         char buf[2048];
317
318         if (strcmp(fmt, ev->fmt))
319                 return 0;
320         vsnprintf(buf, sizeof(buf), fmt, __DECONST(void *, ev->fmtdata));
321         printd("string is: %s\n", buf);
322         va_start(ap, fmt);
323         return vsscanf(buf, fmt, ap);
324 }
325
326 static
327 void
328 evtr_deregister_filters(evtr_t evtr, evtr_filter_t filt, int nfilt)
329 {
330         struct event_filter_unresolved *u, *tmp;
331         int i;
332         TAILQ_FOREACH_MUTABLE(u, &evtr->unresolved_filtq, link, tmp) {
333                 for (i = 0; i < nfilt; ++i) {
334                         if (u->filt == &filt[i]) {
335                                 TAILQ_REMOVE(&evtr->unresolved_filtq, u, link);
336                         }
337                 }
338         }
339 }
340
341 static
342 void
343 evtr_resolve_filters(evtr_t evtr, const char *fmt, int id)
344 {
345         struct event_filter_unresolved *u, *tmp;
346         TAILQ_FOREACH_MUTABLE(u, &evtr->unresolved_filtq, link, tmp) {
347                 if ((u->filt->fmt != NULL) && !strcmp(fmt, u->filt->fmt)) {
348                         u->filt->fmtid = id;
349                         u->filt->flags |= FILTF_ID;
350                         TAILQ_REMOVE(&evtr->unresolved_filtq, u, link);
351                 }
352         }
353 }
354
355 static
356 int
357 evtr_filter_register(evtr_t evtr, evtr_filter_t filt)
358 {
359         struct event_filter_unresolved *res;
360
361         if (!(res = malloc(sizeof(*res)))) {
362                 evtr->err = ENOMEM;
363                 return !0;
364         }
365         res->filt = filt;
366         TAILQ_INSERT_TAIL(&evtr->unresolved_filtq, res, link);
367         return 0;
368 }
369
370 void
371 evtr_event_data(evtr_event_t ev, char *buf, size_t len)
372 {
373         /*
374          * XXX: we implicitly trust the format string.
375          * We shouldn't.
376          */
377         if (ev->fmtdatalen) {
378                 vsnprintf(buf, len, ev->fmt, __DECONST(void *, ev->fmtdata));
379         } else {
380                 strlcpy(buf, ev->fmt, len);
381         }
382 }
383
384
385 int
386 evtr_error(evtr_t evtr)
387 {
388         return evtr->err || (evtr->errmsg != NULL);
389 }
390
391 const char *
392 evtr_errmsg(evtr_t evtr)
393 {
394         return evtr->errmsg ? evtr->errmsg : strerror(evtr->err);
395 }
396
397 static
398 int
399 id_map_cmp(struct id_map *a, struct id_map *b)
400 {
401         return a->id - b->id;
402 }
403
404 static
405 int
406 thread_cmp(struct evtr_thread *a, struct evtr_thread *b)
407 {
408         return (int)a->id - (int)b->id;
409 }
410
411 #define DEFINE_MAP_FIND(prefix, type)           \
412         static                                  \
413         type                            \
414         prefix ## _map_find(struct id_tree *tree, int id)\
415         {                                                \
416                 struct id_map *sid;                      \
417                                                         \
418                 sid = id_tree_RB_LOOKUP(tree, id);      \
419                 return sid ? sid->data : NULL;          \
420         }
421
422 DEFINE_MAP_FIND(string, const char *)
423 DEFINE_MAP_FIND(fmt, const struct event_fmt *)
424
425 static
426 struct evtr_thread *
427 thread_map_find(struct thread_map *map, void *id)
428 {
429         return thread_tree_RB_LOOKUP(&map->root, id);
430 }
431
432 #define DEFINE_MAP_INSERT(prefix, type, _cmp, _dup)     \
433         static                                  \
434         int                                                             \
435         prefix ## _map_insert(struct id_tree *tree, type data, int id) \
436         {                                                               \
437         struct id_map *sid, *osid;                                      \
438                                                                         \
439         sid = malloc(sizeof(*sid));                                     \
440         if (!sid) {                                                     \
441                 return ENOMEM;                                          \
442         }                                                               \
443         sid->id = id;                                                   \
444         sid->data = data;                                               \
445         if ((osid = id_tree_RB_INSERT(tree, sid))) {                    \
446                 free(sid);                                              \
447                 if (_cmp((type)osid->data, data)) {                     \
448                         return EEXIST;                                  \
449                 }                                                       \
450                 printd("mapping already exists, skipping\n");           \
451                 /* we're OK with redefinitions of an id to the same string */ \
452                 return 0;                                               \
453         }                                                               \
454         /* only do the strdup if we're inserting a new string */        \
455         sid->data = _dup(data);         /* XXX: oom */                  \
456         return 0;                                                       \
457 }
458
459 static
460 void
461 thread_map_insert(struct thread_map *map, struct evtr_thread *td)
462 {
463         struct evtr_thread *otd;
464
465         if ((otd = thread_tree_RB_INSERT(&map->root, td))) {
466                 /*
467                  * Thread addresses might be reused, we're
468                  * ok with that.
469                  * DANGER, Will Robinson: this means the user
470                  * of the API needs to copy event->td if they
471                  * want it to remain stable.
472                  */
473                 free((void *)otd->comm);
474                 otd->comm = td->comm;
475                 free(td);
476         }
477 }
478
479 static
480 int
481 event_fmt_cmp(const struct event_fmt *a, const struct event_fmt *b)
482 {
483         int ret = 0;
484
485         if (a->subsys) {
486                 if (b->subsys) {
487                         ret = strcmp(a->subsys, b->subsys);
488                 } else {
489                         ret = strcmp(a->subsys, "");
490                 }
491         } else if (b->subsys) {
492                         ret = strcmp("", b->subsys);
493         }
494         if (ret)
495                 return ret;
496         return strcmp(a->fmt, b->fmt);
497 }
498
499 static
500 struct event_fmt *
501 event_fmt_dup(const struct event_fmt *o)
502 {
503         struct event_fmt *n;
504
505         if (!(n = malloc(sizeof(*n)))) {
506                 return n;
507         }
508         memcpy(n, o, sizeof(*n));
509         return n;
510 }
511
512 DEFINE_MAP_INSERT(string, const char *, strcmp, strdup)
513 DEFINE_MAP_INSERT(fmt, const struct event_fmt *, event_fmt_cmp, event_fmt_dup)
514
515 static
516 int
517 hashfunc(const char *str)
518 {
519         unsigned long hash = 5381;
520         int c;
521
522         while ((c = *str++))
523             hash = ((hash << 5) + hash) + c; /* hash * 33 + c */
524         return hash  % NR_BUCKETS;
525 }
526
527 static
528 struct hashentry *
529 hash_find(struct hashtab *tab, const char *str)
530 {
531         struct hashentry *ent;
532
533         for(ent = tab->buckets[hashfunc(str)]; ent && strcmp(ent->str, str);
534             ent = ent->next);
535
536         return ent;
537 }
538
539 static
540 struct hashentry *
541 hash_insert(struct hashtab *tab, const char *str)
542 {
543         struct hashentry *ent;
544         int hsh;
545
546         if (!(ent = malloc(sizeof(*ent)))) {
547                 fprintf(stderr, "out of memory\n");
548                 return NULL;
549         }
550         hsh = hashfunc(str);
551         ent->next = tab->buckets[hsh];
552         ent->str = strdup(str);
553         ent->id = ++tab->id;
554         if (tab->id == 0) {
555                 fprintf(stderr, "too many strings\n");
556                 free(ent);
557                 return NULL;
558         }
559         tab->buckets[hsh] = ent;
560         return ent;
561 }
562
563 static
564 void
565 thread_creation_callback(evtr_event_t ev, void *d)
566 {
567         evtr_t evtr = (evtr_t)d;
568         struct evtr_thread *td;
569         void *ktd;
570         char buf[20];
571
572         //printd("thread_creation_callback\n");
573         if (parse_format_data(ev, "new_td %p %s", &ktd, buf) != 2) {
574                 return;
575         }
576         buf[19] = '\0';
577
578         if (!(td = malloc(sizeof(*td)))) {
579                 evtr->err = ENOMEM;
580                 return;
581         }
582         td->id = ktd;
583         td->userdata = NULL;
584         if (!(td->comm = strdup(buf))) {
585                 free(td);
586                 evtr->err = ENOMEM;
587                 return;
588         }
589         printd("inserting new thread %p: %s\n", td->id, td->comm);
590         thread_map_insert(&evtr->threads, td);
591 }
592
593 static
594 void
595 thread_switch_callback(evtr_event_t ev, void *d)
596 {
597         evtr_t evtr = (evtr_t)d;
598         struct evtr_thread *tdp, *tdn;
599         void *ktdp, *ktdn;
600         struct cpu *cpu;
601         static struct evtr_event tdcr;
602         static char *fmt = "new_td %p %s";
603         char tidstr[40];
604         char fmtdata[sizeof(void *) + sizeof(char *)];
605
606         //printd("thread_switch_callback\n");
607         cpu = evtr_cpu(evtr, ev->cpu);
608         if (!cpu) {
609                 printd("invalid cpu %d\n", ev->cpu);
610                 return;
611         }
612         if (parse_format_data(ev, "sw  %p > %p", &ktdp, &ktdn) != 2) {
613                 return;
614         }
615         tdp = thread_map_find(&evtr->threads, ktdp);
616         if (!tdp) {
617                 printd("switching from unknown thread %p\n", ktdp);
618         }
619         tdn = thread_map_find(&evtr->threads, ktdn);
620         if (!tdn) {
621                 /*
622                  * Fake a thread creation event for threads we
623                  * haven't seen before.
624                  */
625                 tdcr.type = EVTR_TYPE_PROBE;
626                 tdcr.ts = ev->ts;
627                 tdcr.file = NULL;
628                 tdcr.func = NULL;
629                 tdcr.line = 0;
630                 tdcr.fmt = fmt;
631                 tdcr.fmtdata = &fmtdata;
632                 tdcr.fmtdatalen = sizeof(fmtdata);
633                 tdcr.cpu = ev->cpu;
634                 tdcr.td = NULL;
635                 snprintf(tidstr, sizeof(tidstr), "%p", ktdn);
636                 ((void **)fmtdata)[0] = ktdn;
637                 ((char **)fmtdata)[1] = &tidstr[0];
638                 thread_creation_callback(&tdcr, evtr);
639
640                 tdn = thread_map_find(&evtr->threads, ktdn);
641                 assert(tdn != NULL);
642                 printd("switching to unknown thread %p\n", ktdn);
643                 cpu->td = tdn;
644                 return;
645         }
646         printd("cpu %d: switching to thread %p\n", ev->cpu, ktdn);
647         cpu->td = tdn;
648 }
649
650 static
651 void
652 assert_foff_in_sync(evtr_t evtr)
653 {
654         off_t off;
655
656         /*
657          * We keep our own offset because we
658          * might want to support mmap()
659          */
660         off = ftello(evtr->f);
661         if (evtr->bytes != off) {
662                 fprintf(stderr, "bytes %jd, off %jd\n", evtr->bytes, off);
663                 abort();
664         }
665 }
666
667 static
668 int
669 evtr_write(evtr_t evtr, const void *buf, size_t bytes)
670 {
671         assert_foff_in_sync(evtr);
672         if (fwrite(buf, bytes, 1, evtr->f) != 1) {
673                 evtr->err = errno;
674                 evtr->errmsg = strerror(errno);
675                 return !0;
676         }
677         evtr->bytes += bytes;
678         assert_foff_in_sync(evtr);
679         return 0;
680 }
681
682 /*
683  * Called after dumping a record to make sure the next
684  * record is REC_ALIGN aligned. This does not make much sense,
685  * as we shouldn't be using packed structs anyway.
686  */
687 static
688 int
689 evtr_dump_pad(evtr_t evtr)
690 {
691         size_t pad;
692         static char buf[REC_ALIGN];
693
694         pad = REC_ALIGN - (evtr->bytes % REC_ALIGN);
695         if (pad > 0) {
696                 return evtr_write(evtr, buf, pad);
697         }
698         return 0;
699 }
700
701 /*
702  * We make sure that there is a new record every REC_BOUNDARY
703  * bytes, this costs next to nothing in space and allows for
704  * fast seeking.
705  */
706 static
707 int
708 evtr_dump_avoid_boundary(evtr_t evtr, size_t bytes)
709 {
710         unsigned pad, i;
711         static char buf[256];
712
713         pad = REC_BOUNDARY - (evtr->bytes % REC_BOUNDARY);
714         /* if adding @bytes would cause us to cross a boundary... */
715         if (bytes > pad) {
716                 /* then pad to the boundary */
717                 for (i = 0; i < (pad / sizeof(buf)); ++i) {
718                         if (evtr_write(evtr, buf, sizeof(buf))) {
719                                 return !0;
720                         }
721                 }
722                 i = pad % sizeof(buf);
723                 if (i) {
724                         if (evtr_write(evtr, buf, i)) {
725                                 return !0;
726                         }
727                 }
728         }
729         return 0;
730 }
731
732 static
733 int
734 evtr_dump_fmt(evtr_t evtr, uint64_t ts, const evtr_event_t ev)
735 {
736         struct fmt_event_header fmt;
737         struct hashentry *ent;
738         char *subsys = "", buf[1024];
739
740         if (strlcpy(buf, subsys, sizeof(buf)) >= sizeof(buf)) {
741                 evtr->errmsg = "name of subsystem is too large";
742                 evtr->err = ERANGE;
743                 return 0;
744         }
745         if (strlcat(buf, ev->fmt, sizeof(buf)) >= sizeof(buf)) {
746                 evtr->errmsg = "fmt + name of subsystem is too large";
747                 evtr->err = ERANGE;
748                 return 0;
749         }
750
751         if ((ent = hash_find(evtr->fmts, buf))) {
752                 return ent->id;
753         }
754         if (!(ent = hash_insert(evtr->fmts, buf))) {
755                 evtr->err = evtr->fmts->id ? ENOMEM : ERANGE;
756                 return 0;
757         }
758
759         fmt.eh.type = EVTR_TYPE_FMT;
760         fmt.eh.ts = ts;
761         fmt.subsys_len = strlen(subsys);
762         fmt.fmt_len = strlen(ev->fmt);
763         fmt.id = ent->id;
764         if (evtr_dump_avoid_boundary(evtr, sizeof(fmt) + fmt.subsys_len +
765                                      fmt.fmt_len))
766                 return 0;
767         if (evtr_write(evtr, &fmt, sizeof(fmt)))
768                 return 0;
769         if (evtr_write(evtr, subsys, fmt.subsys_len))
770                 return 0;
771         if (evtr_write(evtr, ev->fmt, fmt.fmt_len))
772                 return 0;
773         if (evtr_dump_pad(evtr))
774                 return 0;
775         return fmt.id;
776 }
777
778 /*
779  * Replace string pointers or string ids in fmtdata
780  */ 
781 static
782 int
783 mangle_string_ptrs(const char *fmt, uint8_t *fmtdata,
784                    const char *(*replace)(void *, const char *), void *ctx)
785 {
786         const char *f, *p;
787         size_t skipsize, intsz;
788         int ret = 0;
789
790         for (f = fmt; f[0] != '\0'; ++f) {
791                 if (f[0] != '%')
792                         continue;
793                 ++f;
794                 skipsize = 0;
795                 for (p = f; p[0]; ++p) {
796                         int again = 0;
797                         /*
798                          * Eat flags. Notice this will accept duplicate
799                          * flags.
800                          */
801                         switch (p[0]) {
802                         case '#':
803                         case '0':
804                         case '-':
805                         case ' ':
806                         case '+':
807                         case '\'':
808                                 again = !0;
809                                 break;
810                         }
811                         if (!again)
812                                 break;
813                 }
814                 /* Eat minimum field width, if any */
815                 for (; isdigit(p[0]); ++p)
816                         ;
817                 if (p[0] == '.')
818                         ++p;
819                 /* Eat precision, if any */
820                 for (; isdigit(p[0]); ++p)
821                         ;
822                 intsz = 0;
823                 switch (p[0]) {
824                 case 'l':
825                         if (p[1] == 'l') {
826                                 ++p;
827                                 intsz = sizeof(long long);
828                         } else {
829                                 intsz = sizeof(long);
830                         }
831                         break;
832                 case 'j':
833                         intsz = sizeof(intmax_t);
834                         break;
835                 case 't':
836                         intsz = sizeof(ptrdiff_t);
837                         break;
838                 case 'z':
839                         intsz = sizeof(size_t);
840                         break;
841                 default:
842                         break;
843                 }
844                 if (intsz != 0)
845                         ++p;
846                 else
847                         intsz = sizeof(int);
848
849                 switch (p[0]) {
850                 case 'd':
851                 case 'i':
852                 case 'o':
853                 case 'u':
854                 case 'x':
855                 case 'X':
856                 case 'c':
857                         skipsize = intsz;
858                         break;
859                 case 'p':
860                         skipsize = sizeof(void *);
861                         break;
862                 case 'f':
863                         if (p[-1] == 'l')
864                                 skipsize = sizeof(double);
865                         else
866                                 skipsize = sizeof(float);
867                         break;
868                 case 's':
869                         ((const char **)fmtdata)[0] =
870                                 replace(ctx, ((char **)fmtdata)[0]);
871                         skipsize = sizeof(char *);
872                         ++ret;
873                         break;
874                 default:
875                         fprintf(stderr, "Unknown conversion specifier %c "
876                                 "in fmt starting with %s", p[0], f - 1);
877                         return -1;
878                 }
879                 fmtdata += skipsize;
880         }
881         return ret;
882 }
883
884 /* XXX: do we really want the timestamp? */
885 static
886 int
887 evtr_dump_string(evtr_t evtr, uint64_t ts, const char *str, int ns)
888 {
889         struct string_event_header s;
890         struct hashentry *ent;
891
892         assert((0 <= ns) && (ns < EVTR_NS_MAX));
893         if ((ent = hash_find(evtr->strings[ns], str))) {
894                 return ent->id;
895         }
896         if (!(ent = hash_insert(evtr->strings[ns], str))) {
897                 evtr->err = evtr->strings[ns]->id ? ENOMEM : ERANGE;
898                 return 0;
899         }
900
901         printd("hash_insert %s ns %d id %d\n", str, ns, ent->id);
902         s.eh.type = EVTR_TYPE_STR;
903         s.eh.ts = ts;
904         s.ns = ns;
905         s.id = ent->id;
906         s.len = strnlen(str, PATH_MAX);
907
908         if (evtr_dump_avoid_boundary(evtr, sizeof(s) + s.len))
909                 return 0;
910         if (evtr_write(evtr, &s, sizeof(s)))
911                 return 0;
912         if (evtr_write(evtr, str, s.len))
913                 return 0;
914         if (evtr_dump_pad(evtr))
915                 return 0;
916         return s.id;
917 }
918
919 struct replace_ctx {
920         evtr_t evtr;
921         uint64_t ts;
922 };
923
924 static
925 const char *
926 replace_strptr(void *_ctx, const char *s)
927 {
928         struct replace_ctx *ctx = _ctx;
929         return (const char *)evtr_dump_string(ctx->evtr, ctx->ts, s, EVTR_NS_DSTR);
930 }
931
932 static
933 const char *
934 replace_strid(void *_ctx, const char *s)
935 {
936         struct replace_ctx *ctx = _ctx;
937         const char *ret;
938
939         ret = string_map_find(&ctx->evtr->maps[EVTR_NS_DSTR - 1].root,
940                               (uint32_t)s);
941         if (!ret) {
942                 fprintf(stderr, "Unknown id for data string\n");
943                 ctx->evtr->errmsg = "unknown id for data string";
944                 ctx->evtr->err = !0;
945         }
946         validate_string(ret);
947         printd("replacing strid %d (ns %d) with string '%s' (or int %#x)\n", (int)s,
948                EVTR_NS_DSTR, ret ? ret : "NULL", (int)ret);
949         return ret;
950 }
951
952 static
953 int
954 evtr_dump_probe(evtr_t evtr, evtr_event_t ev)
955 {
956         struct probe_event_header kev;
957         char buf[1024];
958
959         memset(&kev, '\0', sizeof(kev));
960         kev.eh.type = ev->type;
961         kev.eh.ts = ev->ts;
962         kev.line = ev->line;
963         kev.cpu = ev->cpu;
964         if (ev->file) {
965                 kev.file = evtr_dump_string(evtr, kev.eh.ts, ev->file,
966                                             EVTR_NS_PATH);
967         }
968         if (ev->func) {
969                 kev.func = evtr_dump_string(evtr, kev.eh.ts, ev->func,
970                                             EVTR_NS_FUNC);
971         }
972         if (ev->fmt) {
973                 kev.fmt = evtr_dump_fmt(evtr, kev.eh.ts, ev);
974         }
975         if (ev->fmtdata) {
976                 struct replace_ctx replctx = {
977                         .evtr = evtr,
978                         .ts = ev->ts,
979                 };
980                 assert(ev->fmtdatalen <= sizeof(buf));
981                 kev.datalen = ev->fmtdatalen;
982                 /*
983                  * Replace all string pointers with string ids before dumping
984                  * the data.
985                  */
986                 memcpy(buf, ev->fmtdata, ev->fmtdatalen);
987                 if (mangle_string_ptrs(ev->fmt, buf,
988                                        replace_strptr, &replctx) < 0)
989                         return !0;
990                 if (evtr->err)
991                         return evtr->err;
992         }
993         if (evtr_dump_avoid_boundary(evtr, sizeof(kev) + ev->fmtdatalen))
994                 return !0;
995         if (evtr_write(evtr, &kev, sizeof(kev)))
996                 return !0;
997         if (evtr_write(evtr, buf, ev->fmtdatalen))
998                 return !0;
999         if (evtr_dump_pad(evtr))
1000                 return !0;
1001         return 0;
1002 }
1003
1004 static
1005 int
1006 evtr_dump_sysinfo(evtr_t evtr, evtr_event_t ev)
1007 {
1008         uint8_t type = EVTR_TYPE_SYSINFO;
1009         uint16_t ncpus = ev->ncpus;
1010
1011         if (ncpus <= 0) {
1012                 evtr->errmsg = "invalid number of cpus";
1013                 return !0;
1014         }
1015         if (evtr_dump_avoid_boundary(evtr, sizeof(type) + sizeof(ncpus)))
1016                 return !0;
1017         if (evtr_write(evtr, &type, sizeof(type))) {
1018                 return !0;
1019         }
1020         if (evtr_write(evtr, &ncpus, sizeof(ncpus))) {
1021                 return !0;
1022         }
1023         if (evtr_dump_pad(evtr))
1024                 return !0;
1025         return 0;
1026 }
1027 static
1028 int
1029 evtr_dump_cpuinfo(evtr_t evtr, evtr_event_t ev)
1030 {
1031         struct cpuinfo_event_header ci;
1032         uint8_t type;
1033
1034         if (evtr_dump_avoid_boundary(evtr, sizeof(type) + sizeof(ci)))
1035                 return !0;
1036         type = EVTR_TYPE_CPUINFO;
1037         if (evtr_write(evtr, &type, sizeof(type))) {
1038                 return !0;
1039         }
1040         ci.cpu = ev->cpu;
1041         ci.freq = ev->cpuinfo.freq;
1042         if (evtr_dump_avoid_boundary(evtr, sizeof(ci)))
1043                 return !0;
1044         if (evtr_write(evtr, &ci, sizeof(ci))) {
1045                 return !0;
1046         }
1047         if (evtr_dump_pad(evtr))
1048                 return !0;
1049         return 0;
1050 }
1051
1052 int
1053 evtr_rewind(evtr_t evtr)
1054 {
1055         assert((evtr->flags & EVTRF_WR) == 0);
1056         evtr->bytes = 0;
1057         if (fseek(evtr->f, 0, SEEK_SET)) {
1058                 evtr->err = errno;
1059                 return !0;
1060         }
1061         return 0;
1062 }
1063
1064 int
1065 evtr_dump_event(evtr_t evtr, evtr_event_t ev)
1066 {
1067         switch (ev->type) {
1068         case EVTR_TYPE_PROBE:
1069                 return evtr_dump_probe(evtr, ev);
1070         case EVTR_TYPE_SYSINFO:
1071                 return evtr_dump_sysinfo(evtr, ev);
1072         case EVTR_TYPE_CPUINFO:
1073                 return evtr_dump_cpuinfo(evtr, ev);
1074         }
1075         evtr->errmsg = "unknown event type";
1076         return !0;
1077 }
1078
1079 static
1080 evtr_t
1081 evtr_alloc(FILE *f)
1082 {
1083         evtr_t evtr;
1084         if (!(evtr = malloc(sizeof(*evtr)))) {
1085                 return NULL;
1086         }
1087
1088         evtr->f = f;
1089         evtr->err = 0;
1090         evtr->errmsg = NULL;
1091         evtr->bytes = 0;
1092         TAILQ_INIT(&evtr->unresolved_filtq);
1093         return evtr;
1094 }
1095
1096 evtr_t
1097 evtr_open_read(FILE *f)
1098 {
1099         evtr_t evtr;
1100         struct evtr_event ev;
1101         int i;
1102
1103         if (!(evtr = evtr_alloc(f))) {
1104                 return NULL;
1105         }
1106         evtr->flags = 0;
1107         for (i = 0; i < (EVTR_NS_MAX - 1); ++i) {
1108                 RB_INIT(&evtr->maps[i].root);
1109         }
1110         RB_INIT(&evtr->fmtmap.root);
1111         TAILQ_INIT(&evtr->unresolved_filtq);
1112         evtr->cbs = 0;
1113         evtr->ncbs = 0;
1114         RB_INIT(&evtr->threads.root);
1115         evtr->cpus = NULL;
1116         evtr->ncpus = 0;
1117         if (evtr_register_callback(evtr, &thread_creation_callback, evtr)) {
1118                 goto free_evtr;
1119         }
1120         if (evtr_register_callback(evtr, &thread_switch_callback, evtr)) {
1121                 goto free_cbs;
1122         }
1123         /*
1124          * Load the first event so we can pick up any
1125          * sysinfo entries.
1126          */
1127         if (evtr_next_event(evtr, &ev)) {
1128                 goto free_cbs;
1129         }
1130         if (evtr_rewind(evtr))
1131                 goto free_cbs;
1132         return evtr;
1133 free_cbs:
1134         evtr_deregister_callbacks(evtr);
1135 free_evtr:
1136         free(evtr);
1137         return NULL;
1138 }
1139
1140 evtr_t
1141 evtr_open_write(FILE *f)
1142 {
1143         evtr_t evtr;
1144         int i, j;
1145
1146         if (!(evtr = evtr_alloc(f))) {
1147                 return NULL;
1148         }
1149
1150         evtr->flags = EVTRF_WR;
1151         if (!(evtr->fmts = calloc(sizeof(struct hashtab), 1)))
1152                 goto free_evtr;
1153
1154         for (i = 0; i < EVTR_NS_MAX; ++i) {
1155                 evtr->strings[i] = calloc(sizeof(struct hashtab), 1);
1156                 if (!evtr->strings[i]) {
1157                         for (j = 0; j < i; ++j) {
1158                                 free(evtr->strings[j]);
1159                         }
1160                         goto free_fmts;
1161                 }
1162         }
1163
1164         return evtr;
1165 free_fmts:
1166         free(evtr->fmts);
1167 free_evtr:
1168         free(evtr);
1169         return NULL;
1170 }
1171
1172 static
1173 void
1174 hashtab_destroy(struct hashtab *h)
1175 {
1176         struct hashentry *ent, *next;
1177         int i;
1178         for (i = 0; i < NR_BUCKETS; ++i) {
1179                 for (ent = h->buckets[i]; ent; ent = next) {
1180                         next = ent->next;
1181                         free(ent);
1182                 }
1183         }
1184         free(h);
1185 }
1186
1187 void
1188 evtr_close(evtr_t evtr)
1189 {
1190         int i;
1191
1192         if (evtr->flags & EVTRF_WR) {
1193                 hashtab_destroy(evtr->fmts);
1194                 for (i = 0; i < EVTR_NS_MAX; ++i)
1195                         hashtab_destroy(evtr->strings[i]);
1196         } else {
1197                 id_tree_free(&evtr->fmtmap.root);
1198                 for (i = 0; i < EVTR_NS_MAX - 1; ++i) {
1199                         id_tree_free(&evtr->maps[i].root);
1200                 }
1201         }
1202         free(evtr);
1203 }
1204
1205 static
1206 int
1207 evtr_read(evtr_t evtr, void *buf, size_t size)
1208 {
1209         assert(size > 0);
1210         assert_foff_in_sync(evtr);
1211 //      printd("evtr_read at %#jx, %zd bytes\n", evtr->bytes, size);
1212         if (fread(buf, size, 1, evtr->f) != 1) {
1213                 if (feof(evtr->f)) {
1214                         evtr->errmsg = "incomplete record";
1215                 } else {
1216                         evtr->errmsg = strerror(errno);
1217                 }
1218                 return !0;
1219         }
1220         evtr->bytes += size;
1221         assert_foff_in_sync(evtr);
1222         return 0;
1223 }
1224
1225 static
1226 int
1227 evtr_load_fmt(evtr_t evtr, char *buf)
1228 {
1229         struct fmt_event_header *evh = (struct fmt_event_header *)buf;
1230         struct event_fmt *fmt;
1231         char *subsys = NULL, *fmtstr;
1232
1233         if (!(fmt = malloc(sizeof(*fmt)))) {
1234                 evtr->err = errno;
1235                 return !0;
1236         }
1237         if (evtr_read(evtr, buf + sizeof(struct trace_event_header),
1238                       sizeof(*evh) - sizeof(evh->eh))) {
1239                 goto free_fmt;
1240         }
1241         assert(!evh->subsys_len);
1242         if (evh->subsys_len) {
1243                 if (!(subsys = malloc(evh->subsys_len))) {
1244                         evtr->err = errno;
1245                         goto free_fmt;
1246                 }
1247                 if (evtr_read(evtr, subsys, evh->subsys_len)) {
1248                         goto free_subsys;
1249                 }
1250                 fmt->subsys = subsys;
1251         } else {
1252                 fmt->subsys = "";
1253         }
1254         if (!(fmtstr = malloc(evh->fmt_len + 1))) {
1255                 evtr->err = errno;
1256                 goto free_subsys;
1257         }
1258         if (evtr_read(evtr, fmtstr, evh->fmt_len)) {
1259                 goto free_fmtstr;
1260         }
1261         fmtstr[evh->fmt_len] = '\0';
1262         fmt->fmt = fmtstr;
1263
1264         printd("fmt_map_insert (%d, %s)\n", evh->id, fmt->fmt);
1265         evtr->err = fmt_map_insert(&evtr->fmtmap.root, fmt, evh->id);
1266         switch (evtr->err) {
1267         case ENOMEM:
1268                 evtr->errmsg = "out of memory";
1269                 break;
1270         case EEXIST:
1271                 evtr->errmsg = "redefinition of an id to a "
1272                         "different format (corrupt input)";
1273                 break;
1274         default:
1275                 evtr_resolve_filters(evtr, fmt->fmt, evh->id);
1276         }
1277         return 0;
1278
1279 free_fmtstr:
1280         free(fmtstr);
1281 free_subsys:
1282         if (subsys)
1283                 free(subsys);
1284 free_fmt:
1285         free(fmt);
1286         return !0;
1287 }
1288
1289 static
1290 int
1291 evtr_load_string(evtr_t evtr, char *buf)
1292 {
1293         char sbuf[PATH_MAX + 1];
1294         struct string_event_header *evh = (struct string_event_header *)buf;
1295
1296         if (evtr_read(evtr, buf + sizeof(struct trace_event_header),
1297                       sizeof(*evh) - sizeof(evh->eh))) {
1298                 return !0;
1299         }
1300         if (evh->len > PATH_MAX) {
1301                 evtr->errmsg = "string too large (corrupt input)";
1302                 return !0;
1303         }
1304         if (evh->len && evtr_read(evtr, sbuf, evh->len)) {
1305                 return !0;
1306         }
1307         sbuf[evh->len] = 0;
1308         if (evh->ns >= EVTR_NS_MAX) {
1309                 evtr->errmsg = "invalid namespace (corrupt input)";
1310                 return !0;
1311         }
1312         validate_string(sbuf);
1313         printd("evtr_load_string:ns %d id %d : \"%s\"\n", evh->ns, evh->id,
1314                sbuf);
1315         evtr->err = string_map_insert(&evtr->maps[evh->ns - 1].root, sbuf, evh->id);
1316         switch (evtr->err) {
1317         case ENOMEM:
1318                 evtr->errmsg = "out of memory";
1319                 break;
1320         case EEXIST:
1321                 evtr->errmsg = "redefinition of an id to a "
1322                         "different string (corrupt input)";
1323                 break;
1324         default:
1325                 ;
1326         }
1327         return 0;
1328 }
1329
1330 static
1331 int
1332 evtr_filter_match(evtr_filter_t f, struct probe_event_header *pev)
1333 {
1334         if ((f->cpu != -1) && (f->cpu != pev->cpu))
1335                 return 0;
1336         if (!f->fmtid)
1337                 return !0;
1338         /*
1339          * If we don't have an id for the required format
1340          * string, the format string won't match anyway
1341          * (we require that id <-> fmt mappings appear
1342          * before the first appearance of the fmt string),
1343          * so don't bother comparing.
1344          */
1345         if (!(f->flags & FILTF_ID))
1346                 return 0;
1347         if(pev->fmt == f->fmtid)
1348                 return !0;
1349         return 0;
1350 }
1351
1352 static
1353 int
1354 evtr_match_filters(struct evtr_query *q, struct probe_event_header *pev)
1355 {
1356         int i;
1357
1358         /* no filters means we're interested in all events */
1359         if (!q->nfilt)
1360                 return !0;
1361         ++q->ntried;
1362         for (i = 0; i < q->nfilt; ++i) {
1363                 if (evtr_filter_match(&q->filt[i], pev)) {
1364                         ++q->nmatched;
1365                         return !0;
1366                 }
1367         }
1368         return 0;
1369 }
1370
1371 static
1372 int
1373 evtr_skip(evtr_t evtr, off_t bytes)
1374 {
1375         if (fseek(evtr->f, bytes, SEEK_CUR)) {
1376                 evtr->err = errno;
1377                 evtr->errmsg = strerror(errno);
1378                 return !0;
1379         }
1380         evtr->bytes += bytes;
1381         return 0;
1382 }
1383
1384 /*
1385  * Make sure q->buf is at least len bytes
1386  */
1387 static
1388 int
1389 evtr_query_reserve_buf(struct evtr_query *q, int len)
1390 {
1391         void *tmp;
1392
1393         if (q->bufsize >= len)
1394                 return 0;
1395         if (!(tmp = realloc(q->buf, len)))
1396                 return !0;
1397         q->buf = tmp;
1398         q->bufsize = len;
1399         return 0;
1400 }
1401
1402 static
1403 int
1404 evtr_load_probe(evtr_t evtr, evtr_event_t ev, char *buf, struct evtr_query *q)
1405 {
1406         struct probe_event_header *evh = (struct probe_event_header *)buf;
1407         struct cpu *cpu;
1408
1409         if (evtr_read(evtr, buf + sizeof(struct trace_event_header),
1410                       sizeof(*evh) - sizeof(evh->eh)))
1411                 return !0;
1412         memset(ev, '\0', sizeof(*ev));
1413         ev->ts = evh->eh.ts;
1414         ev->type = EVTR_TYPE_PROBE;
1415         ev->line = evh->line;
1416         ev->cpu = evh->cpu;
1417         if ((cpu = evtr_cpu(evtr, evh->cpu))) {
1418                 ev->td = cpu->td;
1419         } else {
1420                 ev->td = NULL;
1421         }
1422         if (evh->file) {
1423                 ev->file = string_map_find(
1424                         &evtr->maps[EVTR_NS_PATH - 1].root,
1425                         evh->file);
1426                 if (!ev->file) {
1427                         evtr->errmsg = "unknown id for file path";
1428                         evtr->err = !0;
1429                         ev->file = "<unknown>";
1430                 } else {
1431                         validate_string(ev->file);
1432                 }
1433         } else {
1434                 ev->file = "<unknown>";
1435         }
1436         if (evh->fmt) {
1437                 const struct event_fmt *fmt;
1438                 if (!(fmt = fmt_map_find(&evtr->fmtmap.root, evh->fmt))) {
1439                         evtr->errmsg = "unknown id for event fmt";
1440                         evtr->err = !0;
1441                         ev->fmt = NULL;
1442                 } else {
1443                         ev->fmt = fmt->fmt;
1444                         validate_string(fmt->fmt);
1445                 }
1446         }
1447         if (evh->datalen) {
1448                 if (evtr_query_reserve_buf(q, evh->datalen + 1)) {
1449                         evtr->err = ENOMEM;
1450                 } else if (!evtr_read(evtr, q->buf, evh->datalen)) {
1451                         struct replace_ctx replctx = {
1452                                 .evtr = evtr,
1453                                 .ts = ev->ts,
1454                         };
1455                         assert(ev->fmt);
1456
1457                         ev->fmtdata = q->buf;
1458                         /*
1459                          * If the format specifies any string pointers, there
1460                          * is a string id stored in the fmtdata. Look it up
1461                          * and replace it with a string pointer before
1462                          * returning it to the user.
1463                          */
1464                         if (mangle_string_ptrs(ev->fmt, __DECONST(uint8_t *,
1465                                                                   ev->fmtdata),
1466                                                replace_strid, &replctx) < 0)
1467                                 return evtr->err;
1468                         if (evtr->err)
1469                                 return evtr->err;
1470                         ((char *)ev->fmtdata)[evh->datalen] = '\0';
1471                         ev->fmtdatalen = evh->datalen;
1472                 }
1473         }
1474         evtr_run_callbacks(ev, evtr);
1475         /* we can't filter before running the callbacks */ 
1476         if (!evtr_match_filters(q, evh)) {
1477                 return -1;      /* no match */
1478         }
1479
1480         return evtr->err;
1481 }
1482
1483 static
1484 int
1485 evtr_skip_to_record(evtr_t evtr)
1486 {
1487         int skip;
1488         
1489         skip = REC_ALIGN - (evtr->bytes % REC_ALIGN);
1490         if (skip > 0) {
1491                 if (fseek(evtr->f, skip, SEEK_CUR)) {
1492                         evtr->err = errno;
1493                         evtr->errmsg = strerror(errno);
1494                         return !0;
1495                 }
1496                 evtr->bytes += skip;
1497         }
1498         return 0;
1499 }
1500
1501 static
1502 int
1503 evtr_load_sysinfo(evtr_t evtr)
1504 {
1505         uint16_t ncpus;
1506         int i;
1507
1508         if (evtr_read(evtr, &ncpus, sizeof(ncpus))) {
1509                 return !0;
1510         }
1511         if (evtr->cpus)
1512                 return 0;
1513         evtr->cpus = malloc(ncpus * sizeof(struct cpu));
1514         if (!evtr->cpus) {
1515                 evtr->err = ENOMEM;
1516                 return !0;
1517         }
1518         evtr->ncpus = ncpus;
1519         for (i = 0; i < ncpus; ++i) {
1520                 evtr->cpus[i].td = NULL;
1521                 evtr->cpus[i].freq = -1.0;
1522         }
1523         return 0;
1524 }
1525
1526 static
1527 int
1528 evtr_load_cpuinfo(evtr_t evtr)
1529 {
1530         struct cpuinfo_event_header cih;
1531         struct cpu *cpu;
1532
1533         if (evtr_read(evtr, &cih, sizeof(cih))) {
1534                 return !0;
1535         }
1536         if (cih.freq < 0.0) {
1537                 evtr->errmsg = "cpu freq is negative";
1538                 evtr->err = EINVAL;
1539                 return !0;
1540         }
1541         /*
1542          * Notice that freq is merely a multiplier with
1543          * which we convert a timestamp to seconds; if
1544          * ts is not in cycles, freq is not the frequency.
1545          */
1546         if (!(cpu = evtr_cpu(evtr, cih.cpu))) {
1547                 evtr->errmsg = "freq for invalid cpu";
1548                 evtr->err = EINVAL;
1549                 return !0;
1550         }
1551         cpu->freq = cih.freq;
1552         return 0;
1553 }
1554
1555 static
1556 int
1557 _evtr_next_event(evtr_t evtr, evtr_event_t ev, struct evtr_query *q)
1558 {
1559         char buf[MAX_EVHDR_SIZE];
1560         int ret, err, ntried, nmatched;
1561         struct trace_event_header *evhdr = (struct trace_event_header *)buf;
1562
1563         for (ret = 0; !ret;) {
1564                 if (evtr_read(evtr, &evhdr->type, 1)) {
1565                         if (feof(evtr->f)) {
1566                                 evtr->errmsg = NULL;
1567                                 evtr->err = 0;
1568                                 return -1;
1569                         }
1570                         return !0;
1571                 }
1572                 /*
1573                  * skip pad records -- this will only happen if there's a
1574                  * variable sized record close to the boundary
1575                  */
1576                 if (evhdr->type == EVTR_TYPE_PAD) {
1577                         evtr_skip_to_record(evtr);
1578                         continue;
1579                 }
1580                 if (evhdr->type == EVTR_TYPE_SYSINFO) {
1581                         evtr_load_sysinfo(evtr);
1582                         continue;
1583                 } else if (evhdr->type == EVTR_TYPE_CPUINFO) {
1584                         evtr_load_cpuinfo(evtr);
1585                         continue;
1586                 }
1587                 if (evtr_read(evtr, buf + 1, sizeof(*evhdr) - 1))
1588                         return feof(evtr->f) ? -1 : !0;
1589                 switch (evhdr->type) {
1590                 case EVTR_TYPE_PROBE:
1591                         ntried = q->ntried;
1592                         nmatched = q->nmatched;
1593                         if ((err = evtr_load_probe(evtr, ev, buf, q))) {
1594                                 if (err == -1) {
1595                                         /* no match */
1596                                         ret = 0;
1597                                 } else {
1598                                         return !0;
1599                                 }
1600                         } else {
1601                                 ret = !0;
1602                         }
1603                         break;
1604                 case EVTR_TYPE_STR:
1605                         if (evtr_load_string(evtr, buf)) {
1606                                 return !0;
1607                         }
1608                         break;
1609                 case EVTR_TYPE_FMT:
1610                         if (evtr_load_fmt(evtr, buf)) {
1611                                 return !0;
1612                         }
1613                         break;
1614                 default:
1615                         evtr->err = !0;
1616                         evtr->errmsg = "unknown event type (corrupt input?)";
1617                         return !0;
1618                 }
1619                 evtr_skip_to_record(evtr);
1620                 if (ret) {
1621                         q->off = evtr->bytes;
1622                         return 0;
1623                 }
1624         }
1625         /* can't get here */
1626         return !0;
1627 }
1628
1629 int
1630 evtr_next_event(evtr_t evtr, evtr_event_t ev)
1631 {
1632         struct evtr_query *q;
1633         int ret;
1634
1635         if (!(q = evtr_query_init(evtr, NULL, 0))) {
1636                 evtr->err = ENOMEM;
1637                 return !0;
1638         }
1639         ret = _evtr_next_event(evtr, ev, q);
1640         evtr_query_destroy(q);
1641         return ret;
1642 }
1643
1644 int
1645 evtr_last_event(evtr_t evtr, evtr_event_t ev)
1646 {
1647         struct stat st;
1648         int fd;
1649         off_t last_boundary;
1650
1651         fd = fileno(evtr->f);
1652         if (fstat(fd, &st))
1653                 return !0;
1654         /*
1655          * This skips pseudo records, so we can't provide
1656          * an event with all fields filled in this way.
1657          * It's doable, just needs some care. TBD.
1658          */
1659         if (0 && (st.st_mode & S_IFREG)) {
1660                 /*
1661                  * Skip to last boundary, that's the closest to the EOF
1662                  * location that we are sure contains a header so we can
1663                  * pick up the stream.
1664                  */
1665                 last_boundary = (st.st_size / REC_BOUNDARY) * REC_BOUNDARY;
1666                 /* XXX: ->bytes should be in query */
1667                 assert(evtr->bytes == 0);
1668                 evtr_skip(evtr, last_boundary);
1669         }
1670
1671
1672         /*
1673          * If we can't seek, we need to go through the whole file.
1674          * Since you can't seek back, this is pretty useless unless
1675          * you really are interested only in the last event.
1676          */
1677         while (!evtr_next_event(evtr, ev))
1678                 ;
1679         if (evtr_error(evtr))
1680                 return !0;
1681         evtr_rewind(evtr);
1682         return 0;
1683 }
1684
1685 struct evtr_query *
1686 evtr_query_init(evtr_t evtr, evtr_filter_t filt, int nfilt)
1687 {
1688         struct evtr_query *q;
1689         int i;
1690
1691         if (!(q = malloc(sizeof(*q)))) {
1692                 return q;
1693         }
1694         q->bufsize = 2;
1695         if (!(q->buf = malloc(q->bufsize))) {
1696                 goto free_q;
1697         }
1698         q->evtr = evtr;
1699         q->off = 0;
1700         q->filt = filt;
1701         q->nfilt = nfilt;
1702         q->nmatched = 0;
1703         for (i = 0; i < nfilt; ++i) {
1704                 filt[i].flags = 0;
1705                 if (filt[i].fmt == NULL)
1706                         continue;
1707                 if (evtr_filter_register(evtr, &filt[i])) {
1708                         evtr_deregister_filters(evtr, filt, i);
1709                         goto free_buf;
1710                 }
1711         }
1712
1713         return q;
1714 free_buf:
1715         free(q->buf);
1716 free_q:
1717         free(q);
1718         return NULL;
1719 }
1720
1721 void
1722 evtr_query_destroy(struct evtr_query *q)
1723 {
1724         evtr_deregister_filters(q->evtr, q->filt, q->nfilt);
1725         free(q->buf);
1726         free(q);
1727 }
1728
1729 int
1730 evtr_query_next(struct evtr_query *q, evtr_event_t ev)
1731 {
1732         /* we may support that in the future */
1733         if (q->off != q->evtr->bytes)
1734                 return !0;
1735         return _evtr_next_event(q->evtr, ev, q);
1736 }
1737
1738 int
1739 evtr_ncpus(evtr_t evtr)
1740 {
1741         return evtr->ncpus;
1742 }
1743
1744 int
1745 evtr_cpufreqs(evtr_t evtr, double *freqs)
1746 {
1747         int i;
1748
1749         if (!freqs)
1750                 return EINVAL;
1751         for (i = 0; i < evtr->ncpus; ++i) {
1752                 freqs[i] = evtr->cpus[i].freq;
1753         }
1754         return 0;
1755 }