7bf61e7b3911af3fb25d94653cf257c3aa3ea2d7
[dragonfly.git] / lib / libevtr / evtr.c
1 /*
2  * Copyright (c) 2009, 2010 Aggelos Economopoulos.  All rights reserved.
3  * 
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions
6  * are met:
7  * 
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in
12  *    the documentation and/or other materials provided with the
13  *    distribution.
14  * 3. Neither the name of The DragonFly Project nor the names of its
15  *    contributors may be used to endorse or promote products derived
16  *    from this software without specific, prior written permission.
17  * 
18  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
21  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
22  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
23  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
24  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
25  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
26  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
27  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
28  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29  * SUCH DAMAGE.
30  */
31
32 #include <assert.h>
33 #include <ctype.h>
34 #include <err.h>
35 #include <errno.h>
36 #include <limits.h>
37 #include <stdarg.h>
38 #include <stdio.h>
39 #include <stdlib.h>
40 #include <string.h>
41 #include <sys/queue.h>
42 #include <sys/stat.h>
43 #include <sys/tree.h>
44
45
46 #include "evtr.h"
47 #include "internal.h"
48
49 unsigned evtr_debug;
50
51 static
52 void
53 printd_set_flags(const char *str, unsigned int *flags)
54 {
55         /*
56          * This is suboptimal as we don't detect
57          * invalid flags.
58          */
59         for (; *str; ++str) {
60                 if ('A' == *str) {
61                         *flags = -1;
62                         return;
63                 }
64                 if (!islower(*str))
65                         err(2, "invalid debug flag %c\n", *str);
66                 *flags |= 1 << (*str - 'a');
67         }
68 }
69
70
71 enum {
72         MAX_EVHDR_SIZE = PATH_MAX + 200,
73         /* string namespaces */
74         EVTR_NS_PATH = 0x1,
75         EVTR_NS_FUNC,
76         EVTR_NS_DSTR,
77         EVTR_NS_MAX,
78         NR_BUCKETS = 1021,      /* prime */
79         REC_ALIGN = 8,
80         REC_BOUNDARY = 1 << 14,
81         FILTF_ID = 0x10,
82         EVTRF_WR = 0x1,         /* open for writing */
83         EVTRQF_PENDING = 0x1,
84 };
85
86 typedef uint16_t fileid_t;
87 typedef uint16_t funcid_t;
88 typedef uint16_t fmtid_t;
89
90 struct trace_event_header {
91         uint8_t type;
92         uint64_t ts;    /* XXX: this should only be part of probe */
93 } __attribute__((packed));
94
95 struct probe_event_header {
96         struct trace_event_header eh;
97         /*
98          * For these fields, 0 implies "not available"
99          */
100         fileid_t file;
101         funcid_t caller1;
102         funcid_t caller2;
103         funcid_t func;
104         uint16_t line;
105         fmtid_t fmt;
106         uint16_t datalen;
107         uint8_t cpu;    /* -1 if n/a */
108 } __attribute__((packed));
109
110 struct string_event_header {
111         struct trace_event_header eh;
112         uint16_t ns;
113         uint32_t id;
114         uint16_t len;
115 } __attribute__((packed));
116
117 struct fmt_event_header {
118         struct trace_event_header eh;
119         uint16_t id;
120         uint8_t subsys_len;
121         uint8_t fmt_len;
122 } __attribute__((packed));
123
124 struct cpuinfo_event_header {
125         double freq;
126         uint8_t cpu;
127 } __attribute__((packed));
128
129 struct hashentry {
130         uintptr_t key;
131         uintptr_t val;
132         struct hashentry *next;
133 };
134
135 struct hashtab {
136         struct hashentry *buckets[NR_BUCKETS];
137         uintptr_t (*hashfunc)(uintptr_t);
138         uintptr_t (*cmpfunc)(uintptr_t, uintptr_t);
139 };
140
141 struct symtab {
142         struct hashtab tab;
143 };
144
145 struct event_fmt {
146         const char *subsys;
147         const char *fmt;
148 };
149
150 struct event_filter_unresolved {
151         TAILQ_ENTRY(event_filter_unresolved) link;
152         evtr_filter_t filt;
153 };
154
155 struct id_map {
156         RB_ENTRY(id_map) rb_node;
157         int id;
158         const void *data;
159 };
160
161 RB_HEAD(id_tree, id_map);
162 struct string_map {
163         struct id_tree root;
164 };
165
166 struct fmt_map {
167         struct id_tree root;
168 };
169
170 RB_HEAD(thread_tree, evtr_thread);
171
172 struct thread_map {
173         struct thread_tree root;
174 };
175
176 struct event_callback {
177         void (*cb)(evtr_event_t, void *data);
178         void *data;     /* this field must be malloc()ed */
179 };
180
181 struct cpu {
182         struct evtr_thread *td; /* currently executing thread */
183         double freq;
184 };
185
186 struct evtr {
187         FILE *f;
188         int flags;
189         int err;
190         const char *errmsg;
191         off_t bytes;
192         union {
193                 /*
194                  * When writing, we keep track of the strings we've
195                  * already dumped so we only dump them once.
196                  * Paths, function names etc belong to different
197                  * namespaces.
198                  */
199                 struct hashtab_str *strings[EVTR_NS_MAX - 1];
200                 /*
201                  * When reading, we build a map from id to string.
202                  * Every id must be defined at the point of use.
203                  */
204                 struct string_map maps[EVTR_NS_MAX - 1];
205         };
206         union {
207                 /* same as above, but for subsys+fmt pairs */
208                 struct fmt_map fmtmap;
209                 struct hashtab_str *fmts;
210         };
211         struct thread_map threads;
212         struct cpu *cpus;
213         int ncpus;
214 };
215
216 struct evtr_query {
217         evtr_t evtr;
218         off_t off;
219         evtr_filter_t filt;
220         int nfilt;
221         int nmatched;
222         int ntried;
223         void *buf;
224         int bufsize;
225         struct symtab *symtab;
226         int ncbs;
227         struct event_callback **cbs;
228         /*
229          * Filters that have a format specified and we
230          * need to resolve that to an fmtid
231          */
232         TAILQ_HEAD(, event_filter_unresolved) unresolved_filtq;
233         int err;
234         const char *errmsg;
235         int flags;
236         struct evtr_event pending_event;
237 };
238
239 void
240 evtr_set_debug(const char *str)
241 {
242         printd_set_flags(str, &evtr_debug);
243 }
244
245 static int id_map_cmp(struct id_map *, struct id_map *);
246 RB_PROTOTYPE2(id_tree, id_map, rb_node, id_map_cmp, int);
247 RB_GENERATE2(id_tree, id_map, rb_node, id_map_cmp, int, id);
248
249 static int thread_cmp(struct evtr_thread *, struct evtr_thread *);
250 RB_PROTOTYPE2(thread_tree, evtr_thread, rb_node, thread_cmp, void *);
251 RB_GENERATE2(thread_tree, evtr_thread, rb_node, thread_cmp, void *, id);
252
253 static inline
254 void
255 validate_string(const char *str)
256 {
257         if (!(evtr_debug & MISC))
258                 return;
259         for (; *str; ++str)
260                 assert(isprint(*str));
261 }
262
263 static
264 void
265 id_tree_free(struct id_tree *root)
266 {
267         struct id_map *v, *n;
268
269         for (v = RB_MIN(id_tree, root); v; v = n) {
270                 n = RB_NEXT(id_tree, root, v);
271                 RB_REMOVE(id_tree, root, v);
272         }
273 }
274
275 static
276 int
277 evtr_register_callback(evtr_query_t q, void (*fn)(evtr_event_t, void *), void *d)
278 {
279         struct event_callback *cb;
280         void *cbs;
281
282         if (!(cb = malloc(sizeof(*cb)))) {
283                 q->err = ENOMEM;
284                 return !0;
285         }
286         cb->cb = fn;
287         cb->data = d;
288         if (!(cbs = realloc(q->cbs, (++q->ncbs) * sizeof(cb)))) {
289                 --q->ncbs;
290                 free(cb);
291                 q->err = ENOMEM;
292                 return !0;
293         }
294         q->cbs = cbs;
295         q->cbs[q->ncbs - 1] = cb;
296         return 0;
297 }
298
299 static
300 void
301 evtr_deregister_callbacks(evtr_query_t q)
302 {
303         int i;
304
305         for (i = 0; i < q->ncbs; ++i) {
306                 free(q->cbs[i]);
307         }
308         free(q->cbs);
309         q->cbs = NULL;
310 }
311
312 static
313 void
314 evtr_run_callbacks(evtr_event_t ev, evtr_query_t q)
315 {
316         struct event_callback *cb;
317         int i;
318
319         for (i = 0; i < q->ncbs; ++i) {
320                 cb = q->cbs[i];
321                 cb->cb(ev, cb->data);
322         }
323 }
324
325 static
326 struct cpu *
327 evtr_cpu(evtr_t evtr, int c)
328 {
329         if ((c < 0) || (c >= evtr->ncpus))
330                 return NULL;
331         return &evtr->cpus[c];
332 }
333
334 static
335 int
336 parse_format_data(evtr_event_t ev, const char *fmt, ...) __attribute__((format (scanf, 2, 3)));
337 static
338 int
339 parse_format_data(evtr_event_t ev, const char *fmt, ...)
340 {
341         va_list ap;
342         char buf[2048];
343
344         if (strcmp(fmt, ev->fmt))
345                 return 0;
346         vsnprintf(buf, sizeof(buf), fmt, __DECONST(void *, ev->fmtdata));
347         printd(MISC, "string is: %s\n", buf);
348         va_start(ap, fmt);
349         return vsscanf(buf, fmt, ap);
350 }
351
352 static
353 void
354 evtr_deregister_filters(evtr_query_t q, evtr_filter_t filt, int nfilt)
355 {
356         struct event_filter_unresolved *u, *tmp;
357         int i;
358         TAILQ_FOREACH_MUTABLE(u, &q->unresolved_filtq, link, tmp) {
359                 for (i = 0; i < nfilt; ++i) {
360                         if (u->filt == &filt[i]) {
361                                 TAILQ_REMOVE(&q->unresolved_filtq, u, link);
362                         }
363                 }
364         }
365 }
366
367 static
368 int
369 evtr_filter_register(evtr_query_t q, evtr_filter_t filt)
370 {
371         struct event_filter_unresolved *res;
372
373         if (!(res = malloc(sizeof(*res)))) {
374                 q->err = ENOMEM;
375                 return !0;
376         }
377         res->filt = filt;
378         TAILQ_INSERT_TAIL(&q->unresolved_filtq, res, link);
379         return 0;
380 }
381
382 static
383 int
384 evtr_query_needs_parsing(evtr_query_t q)
385 {
386         int i;
387
388         for (i = 0; i < q->nfilt; ++i)
389                 if (q->filt[i].ev_type == EVTR_TYPE_STMT)
390                         return !0;
391         return 0;
392 }
393
394 void
395 evtr_event_data(evtr_event_t ev, char *buf, size_t len)
396 {
397         /*
398          * XXX: we implicitly trust the format string.
399          * We shouldn't.
400          */
401         if (ev->fmtdatalen) {
402                 vsnprintf(buf, len, ev->fmt, __DECONST(void *, ev->fmtdata));
403         } else {
404                 strlcpy(buf, ev->fmt, len);
405         }
406 }
407
408 int
409 evtr_error(evtr_t evtr)
410 {
411         return evtr->err || (evtr->errmsg != NULL);
412 }
413
414 const char *
415 evtr_errmsg(evtr_t evtr)
416 {
417         return evtr->errmsg ? evtr->errmsg : strerror(evtr->err);
418 }
419
420 int
421 evtr_query_error(evtr_query_t q)
422 {
423         return q->err || (q->errmsg != NULL) || evtr_error(q->evtr);
424 }
425
426 const char *
427 evtr_query_errmsg(evtr_query_t q)
428 {
429         return q->errmsg ? q->errmsg :
430                 (q->err ? strerror(q->err) :
431                  (evtr_errmsg(q->evtr)));
432 }
433
434 static
435 int
436 id_map_cmp(struct id_map *a, struct id_map *b)
437 {
438         return a->id - b->id;
439 }
440
441 static
442 int
443 thread_cmp(struct evtr_thread *a, struct evtr_thread *b)
444 {
445         ptrdiff_t d;
446         d =  a->id - b->id;
447         if (d < 0)
448                 return -1;
449         if (!d)
450                 return 0;
451         return 1;
452 }
453
454 #define DEFINE_MAP_FIND(prefix, type)           \
455         static                                  \
456         type                            \
457         prefix ## _map_find(struct id_tree *tree, int id)\
458         {                                                \
459                 struct id_map *sid;                      \
460                                                         \
461                 sid = id_tree_RB_LOOKUP(tree, id);      \
462                 return sid ? sid->data : NULL;          \
463         }
464
465 DEFINE_MAP_FIND(string, const char *)
466 DEFINE_MAP_FIND(fmt, const struct event_fmt *)
467
468 static
469 struct evtr_thread *
470 thread_map_find(struct thread_map *map, void *id)
471 {
472         return thread_tree_RB_LOOKUP(&map->root, id);
473 }
474
475 #define DEFINE_MAP_INSERT(prefix, type, _cmp, _dup)     \
476         static                                  \
477         int                                                             \
478         prefix ## _map_insert(struct id_tree *tree, type data, int id) \
479         {                                                               \
480         struct id_map *sid, *osid;                                      \
481                                                                         \
482         sid = malloc(sizeof(*sid));                                     \
483         if (!sid) {                                                     \
484                 return ENOMEM;                                          \
485         }                                                               \
486         sid->id = id;                                                   \
487         sid->data = data;                                               \
488         if ((osid = id_tree_RB_INSERT(tree, sid))) {                    \
489                 free(sid);                                              \
490                 if (_cmp((type)osid->data, data)) {                     \
491                         return EEXIST;                                  \
492                 }                                                       \
493                 printd(DS, "mapping already exists, skipping\n");               \
494                 /* we're OK with redefinitions of an id to the same string */ \
495                 return 0;                                               \
496         }                                                               \
497         /* only do the strdup if we're inserting a new string */        \
498         sid->data = _dup(data);         /* XXX: oom */                  \
499         return 0;                                                       \
500 }
501
502 static
503 void
504 thread_map_insert(struct thread_map *map, struct evtr_thread *td)
505 {
506         struct evtr_thread *otd;
507
508         if ((otd = thread_tree_RB_INSERT(&map->root, td))) {
509                 /*
510                  * Thread addresses might be reused, we're
511                  * ok with that.
512                  * DANGER, Will Robinson: this means the user
513                  * of the API needs to copy event->td if they
514                  * want it to remain stable.
515                  */
516                 free((void *)otd->comm);
517                 otd->comm = td->comm;
518                 free(td);
519         }
520 }
521
522 static
523 int
524 event_fmt_cmp(const struct event_fmt *a, const struct event_fmt *b)
525 {
526         int ret = 0;
527
528         if (a->subsys) {
529                 if (b->subsys) {
530                         ret = strcmp(a->subsys, b->subsys);
531                 } else {
532                         ret = strcmp(a->subsys, "");
533                 }
534         } else if (b->subsys) {
535                         ret = strcmp("", b->subsys);
536         }
537         if (ret)
538                 return ret;
539         return strcmp(a->fmt, b->fmt);
540 }
541
542 static
543 struct event_fmt *
544 event_fmt_dup(const struct event_fmt *o)
545 {
546         struct event_fmt *n;
547
548         if (!(n = malloc(sizeof(*n)))) {
549                 return n;
550         }
551         memcpy(n, o, sizeof(*n));
552         return n;
553 }
554
555 DEFINE_MAP_INSERT(string, const char *, strcmp, strdup)
556 DEFINE_MAP_INSERT(fmt, const struct event_fmt *, event_fmt_cmp, event_fmt_dup)
557
558 int
559 hash_find(const struct hashtab *tab, uintptr_t key, uintptr_t *val)
560 {
561         struct hashentry *ent;
562
563         for(ent = tab->buckets[tab->hashfunc(key)];
564             ent && tab->cmpfunc(ent->key, key);
565             ent = ent->next);
566
567         if (!ent)
568                 return !0;
569         *val = ent->val;
570         return 0;
571 }
572
573 struct hashentry *
574 hash_insert(struct hashtab *tab, uintptr_t key, uintptr_t val)
575 {
576         struct hashentry *ent;
577         int hsh;
578
579         if (!(ent = malloc(sizeof(*ent)))) {
580                 fprintf(stderr, "out of memory\n");
581                 return NULL;
582         }
583         hsh = tab->hashfunc(key);
584         ent->next = tab->buckets[hsh];
585         ent->key = key;
586         ent->val = val;
587         tab->buckets[hsh] = ent;
588         return ent;
589 }
590
591 static
592 uintptr_t
593 cmpfunc_pointer(uintptr_t a, uintptr_t b)
594 {
595         return b - a;
596 }
597
598 static
599 uintptr_t
600 hashfunc_pointer(uintptr_t p)
601 {
602         return p;
603 }
604
605 struct hashtab *
606 hash_new(void)
607 {
608         struct hashtab *tab;
609         if (!(tab = calloc(sizeof(struct hashtab), 1)))
610                 return tab;
611         tab->hashfunc = &hashfunc_pointer;
612         tab->cmpfunc = &cmpfunc_pointer;
613         return tab;
614 }
615
616 struct hashtab_str {    /* string -> id map */
617         struct hashtab tab;
618         uint16_t id;
619 };
620
621 static
622 uintptr_t
623 hashfunc_string(uintptr_t p)
624 {
625         const char *str = (char *)p;
626         unsigned long hash = 5381;
627         int c;
628
629         while ((c = *str++))
630             hash = ((hash << 5) + hash) + c; /* hash * 33 + c */
631         return hash  % NR_BUCKETS;
632 }
633
634 static
635 uintptr_t
636 cmpfunc_string(uintptr_t a, uintptr_t b)
637 {
638         return strcmp((char *)a, (char *)b);
639 }
640
641
642 static
643 struct hashtab_str *
644 strhash_new(void)
645 {
646         struct hashtab_str *strtab;
647         if (!(strtab = calloc(sizeof(struct hashtab_str), 1)))
648                 return strtab;
649         strtab->tab.hashfunc = &hashfunc_string;
650         strtab->tab.cmpfunc = &cmpfunc_string;
651         return strtab;
652 }
653
654 static
655 void
656 strhash_destroy(struct hashtab_str *strtab)
657 {
658         free(strtab);
659 }
660
661 static
662 int
663 strhash_find(struct hashtab_str *strtab, const char *str, uint16_t *id)
664 {
665         uintptr_t val;
666
667         if (hash_find(&strtab->tab, (uintptr_t)str, &val))
668                 return !0;
669         *id = (uint16_t)val;
670         return 0;
671 }
672
673 static
674 int
675 strhash_insert(struct hashtab_str *strtab, const char *str, uint16_t *id)
676 {
677         uintptr_t val;
678
679         val = ++strtab->id;
680         if (strtab->id == 0) {
681                 fprintf(stderr, "too many strings\n");
682                 return ERANGE;
683         }
684         str = strdup(str);
685         if (!str) {
686                 fprintf(stderr, "out of memory\n");
687                 --strtab->id;
688                 return ENOMEM;
689         }
690         hash_insert(&strtab->tab, (uintptr_t)str, (uintptr_t)val);
691         *id = strtab->id;
692         return 0;
693 }
694
695 static
696 struct symtab *
697 symtab_new(void)
698 {
699         struct symtab *symtab;
700         if (!(symtab = calloc(sizeof(struct symtab), 1)))
701                 return symtab;
702         symtab->tab.hashfunc = &hashfunc_string;
703         symtab->tab.cmpfunc = &cmpfunc_string;
704         return symtab;
705 }
706
707 static
708 void
709 symtab_destroy(struct symtab *symtab)
710 {
711         free(symtab);
712 }
713
714 struct evtr_variable *
715 symtab_find(const struct symtab *symtab, const char *str)
716 {
717         uintptr_t val;
718
719         if (hash_find(&symtab->tab, (uintptr_t)str, &val))
720                 return NULL;
721         return (struct evtr_variable *)val;
722 }
723
724 int
725 symtab_insert(struct symtab *symtab, const char *name,
726                struct evtr_variable *var)
727 {
728         name = strdup(name);
729         if (!name) {
730                 fprintf(stderr, "out of memory\n");
731                 return ENOMEM;
732         }
733         hash_insert(&symtab->tab, (uintptr_t)name, (uintptr_t)var);
734         return 0;
735 }
736
737 static
738 int
739 evtr_filter_match(evtr_query_t q, evtr_filter_t f, evtr_event_t ev)
740 {
741         if ((f->cpu != -1) && (f->cpu != ev->cpu))
742                 return 0;
743
744         assert(!(f->flags & FILTF_ID));
745         if (ev->type != f->ev_type)
746                 return 0;
747         if (ev->type == EVTR_TYPE_PROBE) {
748                 if (f->fmt && strcmp(ev->fmt, f->fmt))
749                         return 0;
750         } else if (ev->type == EVTR_TYPE_STMT) {
751                 struct evtr_variable *var;
752                 /* resolve var */
753                 /* XXX: no need to do that *every* time */
754                 parse_var(f->var, q->symtab, &var);
755                 if (var != ev->stmt.var)
756                         return 0;
757         }
758         return !0;
759 }
760
761 static
762 int
763 evtr_match_filters(struct evtr_query *q, evtr_event_t ev)
764 {
765         int i;
766
767         /* no filters means we're interested in all events */
768         if (!q->nfilt)
769                 return !0;
770         ++q->ntried;
771         for (i = 0; i < q->nfilt; ++i) {
772                 if (evtr_filter_match(q, &q->filt[i], ev)) {
773                         ++q->nmatched;
774                         return !0;
775                 }
776         }
777         return 0;
778 }
779
780 static
781 void
782 parse_callback(evtr_event_t ev, void *d)
783 {
784         evtr_query_t q = (evtr_query_t)d;
785         if (ev->type != EVTR_TYPE_PROBE)
786                 return;
787         if (!ev->fmt || (ev->fmt[0] != '#'))
788                 return;
789         /*
790          * Copy the event to ->pending_event, then call
791          * the parser to convert it into a synthesized
792          * EVTR_TYPE_STMT event.
793          */
794         memcpy(&q->pending_event, ev, sizeof(ev));
795         parse_string(&q->pending_event, q->symtab, &ev->fmt[1]);
796         if (!evtr_match_filters(q, &q->pending_event))
797                 return;
798         /*
799          * This will cause us to return ->pending_event next time
800          * we're called.
801          */
802         q->flags |= EVTRQF_PENDING;
803 }
804
805 static
806 void
807 thread_creation_callback(evtr_event_t ev, void *d)
808 {
809         evtr_query_t q = (evtr_query_t)d;
810         evtr_t evtr = q->evtr;
811         struct evtr_thread *td;
812         void *ktd;
813         char buf[20];
814
815         if (parse_format_data(ev, "new_td %p %s", &ktd, buf) != 2) {
816                 return;
817         }
818         buf[19] = '\0';
819
820         if (!(td = malloc(sizeof(*td)))) {
821                 q->err = ENOMEM;
822                 return;
823         }
824         td->id = ktd;
825         td->userdata = NULL;
826         if (!(td->comm = strdup(buf))) {
827                 free(td);
828                 q->err = ENOMEM;
829                 return;
830         }
831         printd(DS, "inserting new thread %p: %s\n", td->id, td->comm);
832         thread_map_insert(&evtr->threads, td);
833 }
834
835 static
836 void
837 thread_switch_callback(evtr_event_t ev, void *d)
838 {
839         evtr_t evtr = ((evtr_query_t)d)->evtr;
840         struct evtr_thread *tdp, *tdn;
841         void *ktdp, *ktdn;
842         struct cpu *cpu;
843         static struct evtr_event tdcr;
844         static char *fmt = "new_td %p %s";
845         char tidstr[40];
846         char fmtdata[sizeof(void *) + sizeof(char *)];
847
848         cpu = evtr_cpu(evtr, ev->cpu);
849         if (!cpu) {
850                 printw("invalid cpu %d\n", ev->cpu);
851                 return;
852         }
853         if (parse_format_data(ev, "sw  %p > %p", &ktdp, &ktdn) != 2) {
854                 return;
855         }
856         tdp = thread_map_find(&evtr->threads, ktdp);
857         if (!tdp) {
858                 printd(DS, "switching from unknown thread %p\n", ktdp);
859         }
860         tdn = thread_map_find(&evtr->threads, ktdn);
861         if (!tdn) {
862                 /*
863                  * Fake a thread creation event for threads we
864                  * haven't seen before.
865                  */
866                 tdcr.type = EVTR_TYPE_PROBE;
867                 tdcr.ts = ev->ts;
868                 tdcr.file = NULL;
869                 tdcr.func = NULL;
870                 tdcr.line = 0;
871                 tdcr.fmt = fmt;
872                 tdcr.fmtdata = &fmtdata;
873                 tdcr.fmtdatalen = sizeof(fmtdata);
874                 tdcr.cpu = ev->cpu;
875                 tdcr.td = NULL;
876                 snprintf(tidstr, sizeof(tidstr), "%p", ktdn);
877                 ((void **)fmtdata)[0] = ktdn;
878                 ((char **)fmtdata)[1] = &tidstr[0];
879                 thread_creation_callback(&tdcr, d);
880
881                 tdn = thread_map_find(&evtr->threads, ktdn);
882                 assert(tdn != NULL);
883                 printd(DS, "switching to unknown thread %p\n", ktdn);
884                 cpu->td = tdn;
885                 return;
886         }
887         printd(DS, "cpu %d: switching to thread %p\n", ev->cpu, ktdn);
888         cpu->td = tdn;
889 }
890
891 static
892 void
893 assert_foff_in_sync(evtr_t evtr)
894 {
895         off_t off;
896
897         /*
898          * We keep our own offset because we
899          * might want to support mmap()
900          */
901         off = ftello(evtr->f);
902         if (evtr->bytes != off) {
903                 fprintf(stderr, "bytes %jd, off %jd\n", evtr->bytes, off);
904                 abort();
905         }
906 }
907
908 static
909 int
910 evtr_write(evtr_t evtr, const void *buf, size_t bytes)
911 {
912         assert_foff_in_sync(evtr);
913         if (fwrite(buf, bytes, 1, evtr->f) != 1) {
914                 evtr->err = errno;
915                 evtr->errmsg = strerror(errno);
916                 return !0;
917         }
918         evtr->bytes += bytes;
919         assert_foff_in_sync(evtr);
920         return 0;
921 }
922
923 /*
924  * Called after dumping a record to make sure the next
925  * record is REC_ALIGN aligned. This does not make much sense,
926  * as we shouldn't be using packed structs anyway.
927  */
928 static
929 int
930 evtr_dump_pad(evtr_t evtr)
931 {
932         size_t pad;
933         static char buf[REC_ALIGN];
934
935         pad = REC_ALIGN - (evtr->bytes % REC_ALIGN);
936         if (pad > 0) {
937                 return evtr_write(evtr, buf, pad);
938         }
939         return 0;
940 }
941
942 /*
943  * We make sure that there is a new record every REC_BOUNDARY
944  * bytes, this costs next to nothing in space and allows for
945  * fast seeking.
946  */
947 static
948 int
949 evtr_dump_avoid_boundary(evtr_t evtr, size_t bytes)
950 {
951         unsigned pad, i;
952         static char buf[256];
953
954         pad = REC_BOUNDARY - (evtr->bytes % REC_BOUNDARY);
955         /* if adding @bytes would cause us to cross a boundary... */
956         if (bytes > pad) {
957                 /* then pad to the boundary */
958                 for (i = 0; i < (pad / sizeof(buf)); ++i) {
959                         if (evtr_write(evtr, buf, sizeof(buf))) {
960                                 return !0;
961                         }
962                 }
963                 i = pad % sizeof(buf);
964                 if (i) {
965                         if (evtr_write(evtr, buf, i)) {
966                                 return !0;
967                         }
968                 }
969         }
970         return 0;
971 }
972
973 static
974 int
975 evtr_dump_fmt(evtr_t evtr, uint64_t ts, const evtr_event_t ev)
976 {
977         struct fmt_event_header fmt;
978         uint16_t id;
979         int err;
980         char *subsys = "", buf[1024];
981
982         if (strlcpy(buf, subsys, sizeof(buf)) >= sizeof(buf)) {
983                 evtr->errmsg = "name of subsystem is too large";
984                 evtr->err = ERANGE;
985                 return 0;
986         }
987         if (strlcat(buf, ev->fmt, sizeof(buf)) >= sizeof(buf)) {
988                 evtr->errmsg = "fmt + name of subsystem is too large";
989                 evtr->err = ERANGE;
990                 return 0;
991         }
992
993         if (!strhash_find(evtr->fmts, buf, &id)) {
994                 return id;
995         }
996         if ((err = strhash_insert(evtr->fmts, buf, &id))) {
997                 evtr->err = err;
998                 return 0;
999         }
1000
1001         fmt.eh.type = EVTR_TYPE_FMT;
1002         fmt.eh.ts = ts;
1003         fmt.subsys_len = strlen(subsys);
1004         fmt.fmt_len = strlen(ev->fmt);
1005         fmt.id = id;
1006         if (evtr_dump_avoid_boundary(evtr, sizeof(fmt) + fmt.subsys_len +
1007                                      fmt.fmt_len))
1008                 return 0;
1009         if (evtr_write(evtr, &fmt, sizeof(fmt)))
1010                 return 0;
1011         if (evtr_write(evtr, subsys, fmt.subsys_len))
1012                 return 0;
1013         if (evtr_write(evtr, ev->fmt, fmt.fmt_len))
1014                 return 0;
1015         if (evtr_dump_pad(evtr))
1016                 return 0;
1017         return fmt.id;
1018 }
1019
1020 /*
1021  * Replace string pointers or string ids in fmtdata
1022  */ 
1023 static
1024 int
1025 mangle_string_ptrs(const char *fmt, uint8_t *fmtdata,
1026                    const char *(*replace)(void *, const char *), void *ctx)
1027 {
1028         const char *f, *p;
1029         size_t skipsize, intsz;
1030         int ret = 0;
1031
1032         for (f = fmt; f[0] != '\0'; ++f) {
1033                 if (f[0] != '%')
1034                         continue;
1035                 ++f;
1036                 skipsize = 0;
1037                 for (p = f; p[0]; ++p) {
1038                         int again = 0;
1039                         /*
1040                          * Eat flags. Notice this will accept duplicate
1041                          * flags.
1042                          */
1043                         switch (p[0]) {
1044                         case '#':
1045                         case '0':
1046                         case '-':
1047                         case ' ':
1048                         case '+':
1049                         case '\'':
1050                                 again = !0;
1051                                 break;
1052                         }
1053                         if (!again)
1054                                 break;
1055                 }
1056                 /* Eat minimum field width, if any */
1057                 for (; isdigit(p[0]); ++p)
1058                         ;
1059                 if (p[0] == '.')
1060                         ++p;
1061                 /* Eat precision, if any */
1062                 for (; isdigit(p[0]); ++p)
1063                         ;
1064                 intsz = 0;
1065                 switch (p[0]) {
1066                 case 'l':
1067                         if (p[1] == 'l') {
1068                                 ++p;
1069                                 intsz = sizeof(long long);
1070                         } else {
1071                                 intsz = sizeof(long);
1072                         }
1073                         break;
1074                 case 'j':
1075                         intsz = sizeof(intmax_t);
1076                         break;
1077                 case 't':
1078                         intsz = sizeof(ptrdiff_t);
1079                         break;
1080                 case 'z':
1081                         intsz = sizeof(size_t);
1082                         break;
1083                 default:
1084                         break;
1085                 }
1086                 if (intsz != 0)
1087                         ++p;
1088                 else
1089                         intsz = sizeof(int);
1090
1091                 switch (p[0]) {
1092                 case 'd':
1093                 case 'i':
1094                 case 'o':
1095                 case 'u':
1096                 case 'x':
1097                 case 'X':
1098                 case 'c':
1099                         skipsize = intsz;
1100                         break;
1101                 case 'p':
1102                         skipsize = sizeof(void *);
1103                         break;
1104                 case 'f':
1105                         if (p[-1] == 'l')
1106                                 skipsize = sizeof(double);
1107                         else
1108                                 skipsize = sizeof(float);
1109                         break;
1110                 case 's':
1111                         ((const char **)fmtdata)[0] =
1112                                 replace(ctx, ((char **)fmtdata)[0]);
1113                         skipsize = sizeof(char *);
1114                         ++ret;
1115                         break;
1116                 default:
1117                         fprintf(stderr, "Unknown conversion specifier %c "
1118                                 "in fmt starting with %s", p[0], f - 1);
1119                         return -1;
1120                 }
1121                 fmtdata += skipsize;
1122         }
1123         return ret;
1124 }
1125
1126 /* XXX: do we really want the timestamp? */
1127 static
1128 int
1129 evtr_dump_string(evtr_t evtr, uint64_t ts, const char *str, int ns)
1130 {
1131         struct string_event_header s;
1132         int err;
1133         uint16_t id;
1134
1135         assert((0 <= ns) && (ns < EVTR_NS_MAX));
1136         if (!strhash_find(evtr->strings[ns], str, &id)) {
1137                 return id;
1138         }
1139         if ((err = strhash_insert(evtr->strings[ns], str, &id))) {
1140                 evtr->err = err;
1141                 return 0;
1142         }
1143
1144         printd(DS, "hash_insert %s ns %d id %d\n", str, ns, id);
1145         s.eh.type = EVTR_TYPE_STR;
1146         s.eh.ts = ts;
1147         s.ns = ns;
1148         s.id = id;
1149         s.len = strnlen(str, PATH_MAX);
1150
1151         if (evtr_dump_avoid_boundary(evtr, sizeof(s) + s.len))
1152                 return 0;
1153         if (evtr_write(evtr, &s, sizeof(s)))
1154                 return 0;
1155         if (evtr_write(evtr, str, s.len))
1156                 return 0;
1157         if (evtr_dump_pad(evtr))
1158                 return 0;
1159         return s.id;
1160 }
1161
1162 struct replace_ctx {
1163         evtr_t evtr;
1164         uint64_t ts;
1165 };
1166
1167 static
1168 const char *
1169 replace_strptr(void *_ctx, const char *s)
1170 {
1171         struct replace_ctx *ctx = _ctx;
1172         return (const char *)(uintptr_t)evtr_dump_string(ctx->evtr, ctx->ts, s,
1173                                                          EVTR_NS_DSTR);
1174 }
1175
1176 static
1177 const char *
1178 replace_strid(void *_ctx, const char *s)
1179 {
1180         struct replace_ctx *ctx = _ctx;
1181         const char *ret;
1182
1183         ret = string_map_find(&ctx->evtr->maps[EVTR_NS_DSTR - 1].root,
1184                               (int)(uintptr_t)s);
1185         if (!ret) {
1186                 fprintf(stderr, "Unknown id for data string\n");
1187                 ctx->evtr->errmsg = "unknown id for data string";
1188                 ctx->evtr->err = !0;
1189         }
1190         validate_string(ret);
1191         printd(DS, "replacing strid %d (ns %d) with string '%s' (or int %#x)\n",
1192                (int)(uintptr_t)s, EVTR_NS_DSTR, ret ? ret : "NULL", (int)(uintptr_t)ret);
1193         return ret;
1194 }
1195
1196 static
1197 int
1198 evtr_dump_probe(evtr_t evtr, evtr_event_t ev)
1199 {
1200         struct probe_event_header kev;
1201         char buf[1024];
1202
1203         memset(&kev, '\0', sizeof(kev));
1204         kev.eh.type = ev->type;
1205         kev.eh.ts = ev->ts;
1206         kev.line = ev->line;
1207         kev.cpu = ev->cpu;
1208         if (ev->file) {
1209                 kev.file = evtr_dump_string(evtr, kev.eh.ts, ev->file,
1210                                             EVTR_NS_PATH);
1211         }
1212         if (ev->func) {
1213                 kev.func = evtr_dump_string(evtr, kev.eh.ts, ev->func,
1214                                             EVTR_NS_FUNC);
1215         }
1216         if (ev->fmt) {
1217                 kev.fmt = evtr_dump_fmt(evtr, kev.eh.ts, ev);
1218         }
1219         if (ev->fmtdata) {
1220                 struct replace_ctx replctx = {
1221                         .evtr = evtr,
1222                         .ts = ev->ts,
1223                 };
1224                 assert(ev->fmtdatalen <= (int)sizeof(buf));
1225                 kev.datalen = ev->fmtdatalen;
1226                 /*
1227                  * Replace all string pointers with string ids before dumping
1228                  * the data.
1229                  */
1230                 memcpy(buf, ev->fmtdata, ev->fmtdatalen);
1231                 if (mangle_string_ptrs(ev->fmt, buf,
1232                                        replace_strptr, &replctx) < 0)
1233                         return !0;
1234                 if (evtr->err)
1235                         return evtr->err;
1236         }
1237         if (evtr_dump_avoid_boundary(evtr, sizeof(kev) + ev->fmtdatalen))
1238                 return !0;
1239         if (evtr_write(evtr, &kev, sizeof(kev)))
1240                 return !0;
1241         if (evtr_write(evtr, buf, ev->fmtdatalen))
1242                 return !0;
1243         if (evtr_dump_pad(evtr))
1244                 return !0;
1245         return 0;
1246 }
1247
1248 static
1249 int
1250 evtr_dump_sysinfo(evtr_t evtr, evtr_event_t ev)
1251 {
1252         uint8_t type = EVTR_TYPE_SYSINFO;
1253         uint16_t ncpus = ev->ncpus;
1254
1255         if (ncpus <= 0) {
1256                 evtr->errmsg = "invalid number of cpus";
1257                 return !0;
1258         }
1259         if (evtr_dump_avoid_boundary(evtr, sizeof(type) + sizeof(ncpus)))
1260                 return !0;
1261         if (evtr_write(evtr, &type, sizeof(type))) {
1262                 return !0;
1263         }
1264         if (evtr_write(evtr, &ncpus, sizeof(ncpus))) {
1265                 return !0;
1266         }
1267         if (evtr_dump_pad(evtr))
1268                 return !0;
1269         return 0;
1270 }
1271 static
1272 int
1273 evtr_dump_cpuinfo(evtr_t evtr, evtr_event_t ev)
1274 {
1275         struct cpuinfo_event_header ci;
1276         uint8_t type;
1277
1278         if (evtr_dump_avoid_boundary(evtr, sizeof(type) + sizeof(ci)))
1279                 return !0;
1280         type = EVTR_TYPE_CPUINFO;
1281         if (evtr_write(evtr, &type, sizeof(type))) {
1282                 return !0;
1283         }
1284         ci.cpu = ev->cpu;
1285         ci.freq = ev->cpuinfo.freq;
1286         if (evtr_dump_avoid_boundary(evtr, sizeof(ci)))
1287                 return !0;
1288         if (evtr_write(evtr, &ci, sizeof(ci))) {
1289                 return !0;
1290         }
1291         if (evtr_dump_pad(evtr))
1292                 return !0;
1293         return 0;
1294 }
1295
1296 int
1297 evtr_rewind(evtr_t evtr)
1298 {
1299         assert((evtr->flags & EVTRF_WR) == 0);
1300         evtr->bytes = 0;
1301         if (fseek(evtr->f, 0, SEEK_SET)) {
1302                 evtr->err = errno;
1303                 return !0;
1304         }
1305         return 0;
1306 }
1307
1308 int
1309 evtr_dump_event(evtr_t evtr, evtr_event_t ev)
1310 {
1311         switch (ev->type) {
1312         case EVTR_TYPE_PROBE:
1313                 return evtr_dump_probe(evtr, ev);
1314         case EVTR_TYPE_SYSINFO:
1315                 return evtr_dump_sysinfo(evtr, ev);
1316         case EVTR_TYPE_CPUINFO:
1317                 return evtr_dump_cpuinfo(evtr, ev);
1318         }
1319         evtr->errmsg = "unknown event type";
1320         return !0;
1321 }
1322
1323 static
1324 evtr_t
1325 evtr_alloc(FILE *f)
1326 {
1327         evtr_t evtr;
1328         if (!(evtr = malloc(sizeof(*evtr)))) {
1329                 return NULL;
1330         }
1331
1332         evtr->f = f;
1333         evtr->err = 0;
1334         evtr->errmsg = NULL;
1335         evtr->bytes = 0;
1336         return evtr;
1337 }
1338
1339 static int evtr_next_event(evtr_t, evtr_event_t);
1340
1341 evtr_t
1342 evtr_open_read(FILE *f)
1343 {
1344         evtr_t evtr;
1345         struct evtr_event ev;
1346         int i;
1347
1348         if (!(evtr = evtr_alloc(f))) {
1349                 return NULL;
1350         }
1351         evtr->flags = 0;
1352         for (i = 0; i < (EVTR_NS_MAX - 1); ++i) {
1353                 RB_INIT(&evtr->maps[i].root);
1354         }
1355         RB_INIT(&evtr->fmtmap.root);
1356         RB_INIT(&evtr->threads.root);
1357         evtr->cpus = NULL;
1358         evtr->ncpus = 0;
1359         /*
1360          * Load the first event so we can pick up any
1361          * sysinfo entries.
1362          */
1363         if (evtr_next_event(evtr, &ev)) {
1364                 goto free_evtr;
1365         }
1366         if (evtr_rewind(evtr))
1367                 goto free_evtr;
1368         return evtr;
1369 free_evtr:
1370         free(evtr);
1371         return NULL;
1372 }
1373
1374 evtr_t
1375 evtr_open_write(FILE *f)
1376 {
1377         evtr_t evtr;
1378         int i, j;
1379
1380         if (!(evtr = evtr_alloc(f))) {
1381                 return NULL;
1382         }
1383
1384         evtr->flags = EVTRF_WR;
1385         if (!(evtr->fmts = strhash_new()))
1386                 goto free_evtr;
1387         for (i = 0; i < EVTR_NS_MAX; ++i) {
1388                 evtr->strings[i] = strhash_new();
1389                 if (!evtr->strings[i]) {
1390                         for (j = 0; j < i; ++j) {
1391                                 strhash_destroy(evtr->strings[j]);
1392                         }
1393                         goto free_fmts;
1394                 }
1395         }
1396
1397         return evtr;
1398 free_fmts:
1399         strhash_destroy(evtr->fmts);
1400 free_evtr:
1401         free(evtr);
1402         return NULL;
1403 }
1404
1405 static
1406 void
1407 hashtab_destroy(struct hashtab *h)
1408 {
1409         struct hashentry *ent, *next;
1410         int i;
1411         for (i = 0; i < NR_BUCKETS; ++i) {
1412                 for (ent = h->buckets[i]; ent; ent = next) {
1413                         next = ent->next;
1414                         free(ent);
1415                 }
1416         }
1417         free(h);
1418 }
1419
1420 void
1421 evtr_close(evtr_t evtr)
1422 {
1423         int i;
1424
1425         if (evtr->flags & EVTRF_WR) {
1426                 hashtab_destroy(&evtr->fmts->tab);
1427                 for (i = 0; i < EVTR_NS_MAX; ++i)
1428                         hashtab_destroy(&evtr->strings[i]->tab);
1429         } else {
1430                 id_tree_free(&evtr->fmtmap.root);
1431                 for (i = 0; i < EVTR_NS_MAX - 1; ++i) {
1432                         id_tree_free(&evtr->maps[i].root);
1433                 }
1434         }
1435         free(evtr);
1436 }
1437
1438 static
1439 int
1440 evtr_read(evtr_t evtr, void *buf, size_t size)
1441 {
1442         assert(size > 0);
1443         assert_foff_in_sync(evtr);
1444         printd(IO, "evtr_read at %#jx, %zd bytes\n", evtr->bytes, size);
1445         if (fread(buf, size, 1, evtr->f) != 1) {
1446                 if (feof(evtr->f)) {
1447                         evtr->errmsg = "incomplete record";
1448                 } else {
1449                         evtr->errmsg = strerror(errno);
1450                 }
1451                 return !0;
1452         }
1453         evtr->bytes += size;
1454         assert_foff_in_sync(evtr);
1455         return 0;
1456 }
1457
1458 static
1459 int
1460 evtr_load_fmt(evtr_query_t q, char *buf)
1461 {
1462         evtr_t evtr = q->evtr;
1463         struct fmt_event_header *evh = (struct fmt_event_header *)buf;
1464         struct event_fmt *fmt;
1465         char *subsys = NULL, *fmtstr;
1466
1467         if (!(fmt = malloc(sizeof(*fmt)))) {
1468                 evtr->err = errno;
1469                 return !0;
1470         }
1471         if (evtr_read(evtr, buf + sizeof(struct trace_event_header),
1472                       sizeof(*evh) - sizeof(evh->eh))) {
1473                 goto free_fmt;
1474         }
1475         assert(!evh->subsys_len);
1476         if (evh->subsys_len) {
1477                 if (!(subsys = malloc(evh->subsys_len))) {
1478                         evtr->err = errno;
1479                         goto free_fmt;
1480                 }
1481                 if (evtr_read(evtr, subsys, evh->subsys_len)) {
1482                         goto free_subsys;
1483                 }
1484                 fmt->subsys = subsys;
1485         } else {
1486                 fmt->subsys = "";
1487         }
1488         if (!(fmtstr = malloc(evh->fmt_len + 1))) {
1489                 evtr->err = errno;
1490                 goto free_subsys;
1491         }
1492         if (evtr_read(evtr, fmtstr, evh->fmt_len)) {
1493                 goto free_fmtstr;
1494         }
1495         fmtstr[evh->fmt_len] = '\0';
1496         fmt->fmt = fmtstr;
1497
1498         printd(DS, "fmt_map_insert (%d, %s)\n", evh->id, fmt->fmt);
1499         evtr->err = fmt_map_insert(&evtr->fmtmap.root, fmt, evh->id);
1500         switch (evtr->err) {
1501         case ENOMEM:
1502                 evtr->errmsg = "out of memory";
1503                 break;
1504         case EEXIST:
1505                 evtr->errmsg = "redefinition of an id to a "
1506                         "different format (corrupt input)";
1507                 break;
1508         default:
1509                 ;
1510         }
1511         return evtr->err;
1512
1513 free_fmtstr:
1514         free(fmtstr);
1515 free_subsys:
1516         if (subsys)
1517                 free(subsys);
1518 free_fmt:
1519         free(fmt);
1520         return !0;
1521 }
1522
1523 static
1524 int
1525 evtr_load_string(evtr_t evtr, char *buf)
1526 {
1527         char sbuf[PATH_MAX + 1];
1528         struct string_event_header *evh = (struct string_event_header *)buf;
1529
1530         if (evtr_read(evtr, buf + sizeof(struct trace_event_header),
1531                       sizeof(*evh) - sizeof(evh->eh))) {
1532                 return !0;
1533         }
1534         if (evh->len > PATH_MAX) {
1535                 evtr->errmsg = "string too large (corrupt input)";
1536                 return !0;
1537         }
1538         if (evh->len && evtr_read(evtr, sbuf, evh->len)) {
1539                 return !0;
1540         }
1541         sbuf[evh->len] = 0;
1542         if (evh->ns >= EVTR_NS_MAX) {
1543                 evtr->errmsg = "invalid namespace (corrupt input)";
1544                 return !0;
1545         }
1546         validate_string(sbuf);
1547         printd(DS, "evtr_load_string:ns %d id %d : \"%s\"\n", evh->ns, evh->id,
1548                sbuf);
1549         evtr->err = string_map_insert(&evtr->maps[evh->ns - 1].root, sbuf, evh->id);
1550         switch (evtr->err) {
1551         case ENOMEM:
1552                 evtr->errmsg = "out of memory";
1553                 break;
1554         case EEXIST:
1555                 evtr->errmsg = "redefinition of an id to a "
1556                         "different string (corrupt input)";
1557                 break;
1558         default:
1559                 ;
1560         }
1561         return 0;
1562 }
1563
1564 static
1565 int
1566 evtr_skip(evtr_t evtr, off_t bytes)
1567 {
1568         if (fseek(evtr->f, bytes, SEEK_CUR)) {
1569                 evtr->err = errno;
1570                 evtr->errmsg = strerror(errno);
1571                 return !0;
1572         }
1573         evtr->bytes += bytes;
1574         return 0;
1575 }
1576
1577 /*
1578  * Make sure q->buf is at least len bytes
1579  */
1580 static
1581 int
1582 evtr_query_reserve_buf(struct evtr_query *q, int len)
1583 {
1584         void *tmp;
1585
1586         if (q->bufsize >= len)
1587                 return 0;
1588         if (!(tmp = realloc(q->buf, len)))
1589                 return !0;
1590         q->buf = tmp;
1591         q->bufsize = len;
1592         return 0;
1593 }
1594
1595 static
1596 int
1597 evtr_load_probe(evtr_t evtr, evtr_event_t ev, char *buf, struct evtr_query *q)
1598 {
1599         struct probe_event_header *evh = (struct probe_event_header *)buf;
1600         struct cpu *cpu;
1601
1602         if (evtr_read(evtr, buf + sizeof(struct trace_event_header),
1603                       sizeof(*evh) - sizeof(evh->eh)))
1604                 return !0;
1605         memset(ev, '\0', sizeof(*ev));
1606         ev->ts = evh->eh.ts;
1607         ev->type = EVTR_TYPE_PROBE;
1608         ev->line = evh->line;
1609         ev->cpu = evh->cpu;
1610         if ((cpu = evtr_cpu(evtr, evh->cpu))) {
1611                 ev->td = cpu->td;
1612         } else {
1613                 ev->td = NULL;
1614         }
1615         if (evh->file) {
1616                 ev->file = string_map_find(
1617                         &evtr->maps[EVTR_NS_PATH - 1].root,
1618                         evh->file);
1619                 if (!ev->file) {
1620                         evtr->errmsg = "unknown id for file path";
1621                         evtr->err = !0;
1622                         ev->file = "<unknown>";
1623                 } else {
1624                         validate_string(ev->file);
1625                 }
1626         } else {
1627                 ev->file = "<unknown>";
1628         }
1629         if (evh->fmt) {
1630                 const struct event_fmt *fmt;
1631                 if (!(fmt = fmt_map_find(&evtr->fmtmap.root, evh->fmt))) {
1632                         evtr->errmsg = "unknown id for event fmt";
1633                         evtr->err = !0;
1634                         ev->fmt = NULL;
1635                 } else {
1636                         ev->fmt = fmt->fmt;
1637                         validate_string(fmt->fmt);
1638                 }
1639         }
1640         if (evh->datalen) {
1641                 if (evtr_query_reserve_buf(q, evh->datalen + 1)) {
1642                         evtr->err = ENOMEM;
1643                 } else if (!evtr_read(evtr, q->buf, evh->datalen)) {
1644                         struct replace_ctx replctx = {
1645                                 .evtr = evtr,
1646                                 .ts = ev->ts,
1647                         };
1648                         assert(ev->fmt);
1649
1650                         ev->fmtdata = q->buf;
1651                         /*
1652                          * If the format specifies any string pointers, there
1653                          * is a string id stored in the fmtdata. Look it up
1654                          * and replace it with a string pointer before
1655                          * returning it to the user.
1656                          */
1657                         if (mangle_string_ptrs(ev->fmt, __DECONST(uint8_t *,
1658                                                                   ev->fmtdata),
1659                                                replace_strid, &replctx) < 0)
1660                                 return evtr->err;
1661                         if (evtr->err)
1662                                 return evtr->err;
1663                         ((char *)ev->fmtdata)[evh->datalen] = '\0';
1664                         ev->fmtdatalen = evh->datalen;
1665                 }
1666         }
1667         evtr_run_callbacks(ev, q);
1668         return evtr->err;
1669 }
1670
1671 static
1672 int
1673 evtr_skip_to_record(evtr_t evtr)
1674 {
1675         int skip;
1676         
1677         skip = REC_ALIGN - (evtr->bytes % REC_ALIGN);
1678         if (skip > 0) {
1679                 if (fseek(evtr->f, skip, SEEK_CUR)) {
1680                         evtr->err = errno;
1681                         evtr->errmsg = strerror(errno);
1682                         return !0;
1683                 }
1684                 evtr->bytes += skip;
1685         }
1686         return 0;
1687 }
1688
1689 static
1690 int
1691 evtr_load_sysinfo(evtr_t evtr)
1692 {
1693         uint16_t ncpus;
1694         int i;
1695
1696         if (evtr_read(evtr, &ncpus, sizeof(ncpus))) {
1697                 return !0;
1698         }
1699         if (evtr->cpus)
1700                 return 0;
1701         evtr->cpus = malloc(ncpus * sizeof(struct cpu));
1702         if (!evtr->cpus) {
1703                 evtr->err = ENOMEM;
1704                 return !0;
1705         }
1706         evtr->ncpus = ncpus;
1707         for (i = 0; i < ncpus; ++i) {
1708                 evtr->cpus[i].td = NULL;
1709                 evtr->cpus[i].freq = -1.0;
1710         }
1711         return 0;
1712 }
1713
1714 static
1715 int
1716 evtr_load_cpuinfo(evtr_t evtr)
1717 {
1718         struct cpuinfo_event_header cih;
1719         struct cpu *cpu;
1720
1721         if (evtr_read(evtr, &cih, sizeof(cih))) {
1722                 return !0;
1723         }
1724         if (cih.freq < 0.0) {
1725                 evtr->errmsg = "cpu freq is negative";
1726                 evtr->err = EINVAL;
1727                 return !0;
1728         }
1729         /*
1730          * Notice that freq is merely a multiplier with
1731          * which we convert a timestamp to seconds; if
1732          * ts is not in cycles, freq is not the frequency.
1733          */
1734         if (!(cpu = evtr_cpu(evtr, cih.cpu))) {
1735                 evtr->errmsg = "freq for invalid cpu";
1736                 evtr->err = EINVAL;
1737                 return !0;
1738         }
1739         cpu->freq = cih.freq;
1740         return 0;
1741 }
1742
1743 static
1744 int
1745 _evtr_next_event(evtr_t evtr, evtr_event_t ev, struct evtr_query *q)
1746 {
1747         char buf[MAX_EVHDR_SIZE];
1748         int ret, err, ntried, nmatched;
1749         struct trace_event_header *evhdr = (struct trace_event_header *)buf;
1750
1751         for (ret = 0; !ret;) {
1752                 if (q->flags & EVTRQF_PENDING) {
1753                         q->off = evtr->bytes;
1754                         memcpy(ev, &q->pending_event, sizeof(*ev));
1755                         q->flags &= ~EVTRQF_PENDING;
1756                         return 0;
1757                 }
1758                 if (evtr_read(evtr, &evhdr->type, 1)) {
1759                         if (feof(evtr->f)) {
1760                                 evtr->errmsg = NULL;
1761                                 evtr->err = 0;
1762                                 return -1;
1763                         }
1764                         return !0;
1765                 }
1766                 /*
1767                  * skip pad records -- this will only happen if there's a
1768                  * variable sized record close to the boundary
1769                  */
1770                 if (evhdr->type == EVTR_TYPE_PAD) {
1771                         evtr_skip_to_record(evtr);
1772                         continue;
1773                 }
1774                 if (evhdr->type == EVTR_TYPE_SYSINFO) {
1775                         evtr_load_sysinfo(evtr);
1776                         continue;
1777                 } else if (evhdr->type == EVTR_TYPE_CPUINFO) {
1778                         evtr_load_cpuinfo(evtr);
1779                         continue;
1780                 }
1781                 if (evtr_read(evtr, buf + 1, sizeof(*evhdr) - 1))
1782                         return feof(evtr->f) ? -1 : !0;
1783                 switch (evhdr->type) {
1784                 case EVTR_TYPE_PROBE:
1785                         ntried = q->ntried;
1786                         nmatched = q->nmatched;
1787                         if ((err = evtr_load_probe(evtr, ev, buf, q))) {
1788                                 if (err == -1) {
1789                                         /* no match */
1790                                         ret = 0;
1791                                 } else {
1792                                         return !0;
1793                                 }
1794                         } else {
1795                                 ret = !0;
1796                         }
1797                         break;
1798                 case EVTR_TYPE_STR:
1799                         if (evtr_load_string(evtr, buf)) {
1800                                 return !0;
1801                         }
1802                         break;
1803                 case EVTR_TYPE_FMT:
1804                         if (evtr_load_fmt(q, buf)) {
1805                                 return !0;
1806                         }
1807                         break;
1808                 default:
1809                         evtr->err = !0;
1810                         evtr->errmsg = "unknown event type (corrupt input?)";
1811                         return !0;
1812                 }
1813                 evtr_skip_to_record(evtr);
1814                 if (ret) {
1815                         if (!evtr_match_filters(q, ev)) {
1816                                 ret = 0;
1817                                 continue;
1818                         }
1819                         q->off = evtr->bytes;
1820                         return 0;
1821                 }
1822         }
1823         /* can't get here */
1824         return !0;
1825 }
1826
1827 static
1828 int
1829 evtr_next_event(evtr_t evtr, evtr_event_t ev)
1830 {
1831         struct evtr_query *q;
1832         int ret;
1833
1834         if (!(q = evtr_query_init(evtr, NULL, 0))) {
1835                 evtr->err = ENOMEM;
1836                 return !0;
1837         }
1838         ret = _evtr_next_event(evtr, ev, q);
1839         evtr_query_destroy(q);
1840         return ret;
1841 }
1842
1843 int
1844 evtr_last_event(evtr_t evtr, evtr_event_t ev)
1845 {
1846         struct stat st;
1847         int fd;
1848         off_t last_boundary;
1849
1850         if (evtr_error(evtr))
1851                 return !0;
1852
1853         fd = fileno(evtr->f);
1854         if (fstat(fd, &st))
1855                 return !0;
1856         /*
1857          * This skips pseudo records, so we can't provide
1858          * an event with all fields filled in this way.
1859          * It's doable, just needs some care. TBD.
1860          */
1861         if (0 && (st.st_mode & S_IFREG)) {
1862                 /*
1863                  * Skip to last boundary, that's the closest to the EOF
1864                  * location that we are sure contains a header so we can
1865                  * pick up the stream.
1866                  */
1867                 last_boundary = (st.st_size / REC_BOUNDARY) * REC_BOUNDARY;
1868                 /* XXX: ->bytes should be in query */
1869                 assert(evtr->bytes == 0);
1870                 evtr_skip(evtr, last_boundary);
1871         }
1872
1873
1874         /*
1875          * If we can't seek, we need to go through the whole file.
1876          * Since you can't seek back, this is pretty useless unless
1877          * you really are interested only in the last event.
1878          */
1879         while (!evtr_next_event(evtr, ev))
1880                 ;
1881         if (evtr_error(evtr))
1882                 return !0;
1883         evtr_rewind(evtr);
1884         return 0;
1885 }
1886
1887 struct evtr_query *
1888 evtr_query_init(evtr_t evtr, evtr_filter_t filt, int nfilt)
1889 {
1890         struct evtr_query *q;
1891         int i;
1892
1893         if (!(q = malloc(sizeof(*q)))) {
1894                 return q;
1895         }
1896         q->bufsize = 2;
1897         if (!(q->buf = malloc(q->bufsize))) {
1898                 goto free_q;
1899         }
1900         if (!(q->symtab = symtab_new()))
1901                 goto free_buf;
1902         q->evtr = evtr;
1903         q->off = 0;
1904         q->filt = filt;
1905         q->nfilt = nfilt;
1906         TAILQ_INIT(&q->unresolved_filtq);
1907         q->nmatched = 0;
1908         q->cbs = NULL;
1909         q->ncbs = 0;
1910         q->flags = 0;
1911         memset(&q->pending_event, '\0', sizeof(q->pending_event));
1912         if (evtr_register_callback(q, &thread_creation_callback, q)) {
1913                 goto free_symtab;
1914         }
1915         if (evtr_register_callback(q, &thread_switch_callback, q)) {
1916                 goto free_cbs;
1917         }
1918         if (evtr_query_needs_parsing(q) &&
1919             evtr_register_callback(q, &parse_callback, q)) {
1920                 goto free_cbs;
1921         }
1922
1923         for (i = 0; i < nfilt; ++i) {
1924                 filt[i].flags = 0;
1925                 if (filt[i].fmt == NULL)
1926                         continue;
1927                 if (evtr_filter_register(q, &filt[i])) {
1928                         evtr_deregister_filters(q, filt, i);
1929                         goto free_symtab;
1930                 }
1931         }
1932
1933         return q;
1934 free_cbs:
1935         evtr_deregister_callbacks(q);
1936 free_symtab:
1937         symtab_destroy(q->symtab);
1938 free_buf:
1939         free(q->buf);
1940 free_q:
1941         free(q);
1942         return NULL;
1943 }
1944
1945 void
1946 evtr_query_destroy(struct evtr_query *q)
1947 {
1948         evtr_deregister_filters(q, q->filt, q->nfilt);
1949                 
1950         free(q->buf);
1951         free(q);
1952 }
1953
1954 int
1955 evtr_query_next(struct evtr_query *q, evtr_event_t ev)
1956 {
1957         if (evtr_query_error(q))
1958                 return !0;
1959         /* we may support that in the future */
1960         if (q->off != q->evtr->bytes) {
1961                 q->errmsg = "evtr/query offset mismatch";
1962                 return !0;
1963         }
1964         return _evtr_next_event(q->evtr, ev, q);
1965 }
1966
1967 int
1968 evtr_ncpus(evtr_t evtr)
1969 {
1970         return evtr->ncpus;
1971 }
1972
1973 int
1974 evtr_cpufreqs(evtr_t evtr, double *freqs)
1975 {
1976         int i;
1977
1978         if (!freqs)
1979                 return EINVAL;
1980         for (i = 0; i < evtr->ncpus; ++i) {
1981                 freqs[i] = evtr->cpus[i].freq;
1982         }
1983         return 0;
1984 }