| Commit | Line | Data |
|---|---|---|
| 32c903ac | 1 | /* $Id: chars.c,v 1.12 2009/11/01 07:44:32 kristaps Exp $ */ |
| 589e7c1d SW |
2 | /* |
| 3 | * Copyright (c) 2009 Kristaps Dzonsons <kristaps@kth.se> | |
| 4 | * | |
| 5 | * Permission to use, copy, modify, and distribute this software for any | |
| 6 | * purpose with or without fee is hereby granted, provided that the above | |
| 7 | * copyright notice and this permission notice appear in all copies. | |
| 8 | * | |
| 9 | * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES | |
| 10 | * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF | |
| 11 | * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR | |
| 12 | * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES | |
| 13 | * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN | |
| 14 | * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF | |
| 15 | * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. | |
| 16 | */ | |
| 17 | #include <assert.h> | |
| 32c903ac | 18 | #include <stdio.h> |
| 589e7c1d SW |
19 | #include <stdlib.h> |
| 20 | #include <string.h> | |
| 21 | ||
| 22 | #include "chars.h" | |
| 23 | ||
| 24 | #define PRINT_HI 126 | |
| 25 | #define PRINT_LO 32 | |
| 26 | ||
| 27 | struct ln { | |
| 28 | struct ln *next; | |
| 29 | const char *code; | |
| 30 | const char *ascii; | |
| 31 | const char *html; | |
| 32 | size_t codesz; | |
| 33 | size_t asciisz; | |
| 34 | size_t htmlsz; | |
| 35 | int type; | |
| 36 | #define CHARS_CHAR (1 << 0) | |
| 37 | #define CHARS_STRING (1 << 1) | |
| 32c903ac | 38 | #define CHARS_BOTH (CHARS_CHAR | CHARS_STRING) |
| 589e7c1d SW |
39 | }; |
| 40 | ||
| 41 | #define LINES_MAX 351 | |
| 42 | ||
| 43 | #define CHAR(w, x, y, z, a, b) \ | |
| 44 | { NULL, (w), (y), (a), (x), (z), (b), CHARS_CHAR }, | |
| 45 | #define STRING(w, x, y, z, a, b) \ | |
| 46 | { NULL, (w), (y), (a), (x), (z), (b), CHARS_STRING }, | |
| 47 | #define BOTH(w, x, y, z, a, b) \ | |
| 48 | { NULL, (w), (y), (a), (x), (z), (b), CHARS_BOTH }, | |
| 49 | ||
| 50 | static struct ln lines[LINES_MAX] = { | |
| 51 | #include "chars.in" | |
| 52 | }; | |
| 53 | ||
| 54 | struct tbl { | |
| 55 | enum chars type; | |
| 56 | struct ln **htab; | |
| 57 | }; | |
| 58 | ||
| 59 | static inline int match(const struct ln *, | |
| 60 | const char *, size_t, int); | |
| 61 | static const char *find(struct tbl *, const char *, | |
| 62 | size_t, size_t *, int); | |
| 63 | ||
| 64 | ||
| 65 | void | |
| 66 | chars_free(void *arg) | |
| 67 | { | |
| 68 | struct tbl *tab; | |
| 69 | ||
| 70 | tab = (struct tbl *)arg; | |
| 71 | ||
| 72 | free(tab->htab); | |
| 73 | free(tab); | |
| 74 | } | |
| 75 | ||
| 76 | ||
| 77 | void * | |
| 78 | chars_init(enum chars type) | |
| 79 | { | |
| 80 | struct tbl *tab; | |
| 81 | struct ln **htab; | |
| 82 | struct ln *pp; | |
| 83 | int i, hash; | |
| 84 | ||
| 85 | /* | |
| 86 | * Constructs a very basic chaining hashtable. The hash routine | |
| 87 | * is simply the integral value of the first character. | |
| 88 | * Subsequent entries are chained in the order they're processed | |
| 89 | * (they're in-line re-ordered during lookup). | |
| 90 | */ | |
| 91 | ||
| 32c903ac SW |
92 | tab = malloc(sizeof(struct tbl)); |
| 93 | if (NULL == tab) { | |
| 94 | perror(NULL); | |
| 95 | exit(EXIT_FAILURE); | |
| 96 | } | |
| 589e7c1d SW |
97 | |
| 98 | htab = calloc(PRINT_HI - PRINT_LO + 1, sizeof(struct ln **)); | |
| 32c903ac SW |
99 | if (NULL == htab) { |
| 100 | perror(NULL); | |
| 101 | exit(EXIT_FAILURE); | |
| 102 | } | |
| 589e7c1d SW |
103 | |
| 104 | for (i = 0; i < LINES_MAX; i++) { | |
| 105 | hash = (int)lines[i].code[0] - PRINT_LO; | |
| 106 | ||
| 107 | if (NULL == (pp = htab[hash])) { | |
| 108 | htab[hash] = &lines[i]; | |
| 109 | continue; | |
| 110 | } | |
| 111 | ||
| 112 | for ( ; pp->next; pp = pp->next) | |
| 113 | /* Scan ahead. */ ; | |
| 114 | pp->next = &lines[i]; | |
| 115 | } | |
| 116 | ||
| 117 | tab->htab = htab; | |
| 32c903ac | 118 | tab->type = type; |
| 589e7c1d SW |
119 | return(tab); |
| 120 | } | |
| 121 | ||
| 122 | ||
| 123 | const char * | |
| 124 | chars_a2ascii(void *arg, const char *p, size_t sz, size_t *rsz) | |
| 125 | { | |
| 126 | ||
| 127 | return(find((struct tbl *)arg, p, sz, rsz, CHARS_CHAR)); | |
| 128 | } | |
| 129 | ||
| 130 | ||
| 131 | const char * | |
| 132 | chars_a2res(void *arg, const char *p, size_t sz, size_t *rsz) | |
| 133 | { | |
| 134 | ||
| 135 | return(find((struct tbl *)arg, p, sz, rsz, CHARS_STRING)); | |
| 136 | } | |
| 137 | ||
| 138 | ||
| 139 | static const char * | |
| 140 | find(struct tbl *tab, const char *p, size_t sz, size_t *rsz, int type) | |
| 141 | { | |
| 142 | struct ln *pp, *prev; | |
| 143 | struct ln **htab; | |
| 144 | int hash; | |
| 145 | ||
| 146 | assert(p); | |
| 147 | assert(sz > 0); | |
| 148 | ||
| 149 | if (p[0] < PRINT_LO || p[0] > PRINT_HI) | |
| 150 | return(NULL); | |
| 151 | ||
| 152 | /* | |
| 153 | * Lookup the symbol in the symbol hash. See ascii2htab for the | |
| 154 | * hashtable specs. This dynamically re-orders the hash chain | |
| 155 | * to optimise for repeat hits. | |
| 156 | */ | |
| 157 | ||
| 158 | hash = (int)p[0] - PRINT_LO; | |
| 159 | htab = tab->htab; | |
| 160 | ||
| 161 | if (NULL == (pp = htab[hash])) | |
| 162 | return(NULL); | |
| 163 | ||
| 164 | if (NULL == pp->next) { | |
| 165 | if ( ! match(pp, p, sz, type)) | |
| 166 | return(NULL); | |
| 167 | ||
| 168 | if (CHARS_HTML == tab->type) { | |
| 169 | *rsz = pp->htmlsz; | |
| 170 | return(pp->html); | |
| 171 | } | |
| 172 | *rsz = pp->asciisz; | |
| 173 | return(pp->ascii); | |
| 174 | } | |
| 175 | ||
| 176 | for (prev = NULL; pp; pp = pp->next) { | |
| 177 | if ( ! match(pp, p, sz, type)) { | |
| 178 | prev = pp; | |
| 179 | continue; | |
| 180 | } | |
| 181 | ||
| 182 | if (prev) { | |
| 183 | prev->next = pp->next; | |
| 184 | pp->next = htab[hash]; | |
| 185 | htab[hash] = pp; | |
| 186 | } | |
| 187 | ||
| 188 | if (CHARS_HTML == tab->type) { | |
| 189 | *rsz = pp->htmlsz; | |
| 190 | return(pp->html); | |
| 191 | } | |
| 192 | *rsz = pp->asciisz; | |
| 193 | return(pp->ascii); | |
| 194 | } | |
| 195 | ||
| 196 | return(NULL); | |
| 197 | } | |
| 198 | ||
| 199 | ||
| 200 | static inline int | |
| 201 | match(const struct ln *ln, const char *p, size_t sz, int type) | |
| 202 | { | |
| 203 | ||
| 204 | if ( ! (ln->type & type)) | |
| 205 | return(0); | |
| 206 | if (ln->codesz != sz) | |
| 207 | return(0); | |
| 208 | return(0 == strncmp(ln->code, p, sz)); | |
| 209 | } |