Fix hangs with processes stuck sleeping on btalloc on i386.
[freebsd.git] / sys / cddl / contrib / opensolaris / uts / common / fs / zfs / zap_leaf.c
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21
22 /*
23  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
24  * Copyright (c) 2013, 2016 by Delphix. All rights reserved.
25  * Copyright 2017 Nexenta Systems, Inc.
26  */
27
28 /*
29  * The 512-byte leaf is broken into 32 16-byte chunks.
30  * chunk number n means l_chunk[n], even though the header precedes it.
31  * the names are stored null-terminated.
32  */
33
34 #include <sys/zio.h>
35 #include <sys/spa.h>
36 #include <sys/dmu.h>
37 #include <sys/zfs_context.h>
38 #include <sys/fs/zfs.h>
39 #include <sys/zap.h>
40 #include <sys/zap_impl.h>
41 #include <sys/zap_leaf.h>
42 #include <sys/arc.h>
43
44 static uint16_t *zap_leaf_rehash_entry(zap_leaf_t *l, uint16_t entry);
45
46 #define CHAIN_END 0xffff /* end of the chunk chain */
47
48 /* half the (current) minimum block size */
49 #define MAX_ARRAY_BYTES (8<<10)
50
51 #define LEAF_HASH(l, h) \
52         ((ZAP_LEAF_HASH_NUMENTRIES(l)-1) & \
53         ((h) >> \
54         (64 - ZAP_LEAF_HASH_SHIFT(l) - zap_leaf_phys(l)->l_hdr.lh_prefix_len)))
55
56 #define LEAF_HASH_ENTPTR(l, h) (&zap_leaf_phys(l)->l_hash[LEAF_HASH(l, h)])
57
58 extern inline zap_leaf_phys_t *zap_leaf_phys(zap_leaf_t *l);
59
60 static void
61 zap_memset(void *a, int c, size_t n)
62 {
63         char *cp = a;
64         char *cpend = cp + n;
65
66         while (cp < cpend)
67                 *cp++ = c;
68 }
69
70 static void
71 stv(int len, void *addr, uint64_t value)
72 {
73         switch (len) {
74         case 1:
75                 *(uint8_t *)addr = value;
76                 return;
77         case 2:
78                 *(uint16_t *)addr = value;
79                 return;
80         case 4:
81                 *(uint32_t *)addr = value;
82                 return;
83         case 8:
84                 *(uint64_t *)addr = value;
85                 return;
86         }
87         ASSERT(!"bad int len");
88 }
89
90 static uint64_t
91 ldv(int len, const void *addr)
92 {
93         switch (len) {
94         case 1:
95                 return (*(uint8_t *)addr);
96         case 2:
97                 return (*(uint16_t *)addr);
98         case 4:
99                 return (*(uint32_t *)addr);
100         case 8:
101                 return (*(uint64_t *)addr);
102         }
103         ASSERT(!"bad int len");
104         return (0xFEEDFACEDEADBEEFULL);
105 }
106
107 void
108 zap_leaf_byteswap(zap_leaf_phys_t *buf, int size)
109 {
110         zap_leaf_t l;
111         dmu_buf_t l_dbuf;
112
113         l_dbuf.db_data = buf;
114         l.l_bs = highbit64(size) - 1;
115         l.l_dbuf = &l_dbuf;
116
117         buf->l_hdr.lh_block_type =      BSWAP_64(buf->l_hdr.lh_block_type);
118         buf->l_hdr.lh_prefix =          BSWAP_64(buf->l_hdr.lh_prefix);
119         buf->l_hdr.lh_magic =           BSWAP_32(buf->l_hdr.lh_magic);
120         buf->l_hdr.lh_nfree =           BSWAP_16(buf->l_hdr.lh_nfree);
121         buf->l_hdr.lh_nentries =        BSWAP_16(buf->l_hdr.lh_nentries);
122         buf->l_hdr.lh_prefix_len =      BSWAP_16(buf->l_hdr.lh_prefix_len);
123         buf->l_hdr.lh_freelist =        BSWAP_16(buf->l_hdr.lh_freelist);
124
125         for (int i = 0; i < ZAP_LEAF_HASH_NUMENTRIES(&l); i++)
126                 buf->l_hash[i] = BSWAP_16(buf->l_hash[i]);
127
128         for (int i = 0; i < ZAP_LEAF_NUMCHUNKS(&l); i++) {
129                 zap_leaf_chunk_t *lc = &ZAP_LEAF_CHUNK(&l, i);
130                 struct zap_leaf_entry *le;
131
132                 switch (lc->l_free.lf_type) {
133                 case ZAP_CHUNK_ENTRY:
134                         le = &lc->l_entry;
135
136                         le->le_type =           BSWAP_8(le->le_type);
137                         le->le_value_intlen =   BSWAP_8(le->le_value_intlen);
138                         le->le_next =           BSWAP_16(le->le_next);
139                         le->le_name_chunk =     BSWAP_16(le->le_name_chunk);
140                         le->le_name_numints =   BSWAP_16(le->le_name_numints);
141                         le->le_value_chunk =    BSWAP_16(le->le_value_chunk);
142                         le->le_value_numints =  BSWAP_16(le->le_value_numints);
143                         le->le_cd =             BSWAP_32(le->le_cd);
144                         le->le_hash =           BSWAP_64(le->le_hash);
145                         break;
146                 case ZAP_CHUNK_FREE:
147                         lc->l_free.lf_type =    BSWAP_8(lc->l_free.lf_type);
148                         lc->l_free.lf_next =    BSWAP_16(lc->l_free.lf_next);
149                         break;
150                 case ZAP_CHUNK_ARRAY:
151                         lc->l_array.la_type =   BSWAP_8(lc->l_array.la_type);
152                         lc->l_array.la_next =   BSWAP_16(lc->l_array.la_next);
153                         /* la_array doesn't need swapping */
154                         break;
155                 default:
156                         ASSERT(!"bad leaf type");
157                 }
158         }
159 }
160
161 void
162 zap_leaf_init(zap_leaf_t *l, boolean_t sort)
163 {
164         l->l_bs = highbit64(l->l_dbuf->db_size) - 1;
165         zap_memset(&zap_leaf_phys(l)->l_hdr, 0,
166             sizeof (struct zap_leaf_header));
167         zap_memset(zap_leaf_phys(l)->l_hash, CHAIN_END,
168             2*ZAP_LEAF_HASH_NUMENTRIES(l));
169         for (int i = 0; i < ZAP_LEAF_NUMCHUNKS(l); i++) {
170                 ZAP_LEAF_CHUNK(l, i).l_free.lf_type = ZAP_CHUNK_FREE;
171                 ZAP_LEAF_CHUNK(l, i).l_free.lf_next = i+1;
172         }
173         ZAP_LEAF_CHUNK(l, ZAP_LEAF_NUMCHUNKS(l)-1).l_free.lf_next = CHAIN_END;
174         zap_leaf_phys(l)->l_hdr.lh_block_type = ZBT_LEAF;
175         zap_leaf_phys(l)->l_hdr.lh_magic = ZAP_LEAF_MAGIC;
176         zap_leaf_phys(l)->l_hdr.lh_nfree = ZAP_LEAF_NUMCHUNKS(l);
177         if (sort)
178                 zap_leaf_phys(l)->l_hdr.lh_flags |= ZLF_ENTRIES_CDSORTED;
179 }
180
181 /*
182  * Routines which manipulate leaf chunks (l_chunk[]).
183  */
184
185 static uint16_t
186 zap_leaf_chunk_alloc(zap_leaf_t *l)
187 {
188         ASSERT(zap_leaf_phys(l)->l_hdr.lh_nfree > 0);
189
190         int chunk = zap_leaf_phys(l)->l_hdr.lh_freelist;
191         ASSERT3U(chunk, <, ZAP_LEAF_NUMCHUNKS(l));
192         ASSERT3U(ZAP_LEAF_CHUNK(l, chunk).l_free.lf_type, ==, ZAP_CHUNK_FREE);
193
194         zap_leaf_phys(l)->l_hdr.lh_freelist =
195             ZAP_LEAF_CHUNK(l, chunk).l_free.lf_next;
196
197         zap_leaf_phys(l)->l_hdr.lh_nfree--;
198
199         return (chunk);
200 }
201
202 static void
203 zap_leaf_chunk_free(zap_leaf_t *l, uint16_t chunk)
204 {
205         struct zap_leaf_free *zlf = &ZAP_LEAF_CHUNK(l, chunk).l_free;
206         ASSERT3U(zap_leaf_phys(l)->l_hdr.lh_nfree, <, ZAP_LEAF_NUMCHUNKS(l));
207         ASSERT3U(chunk, <, ZAP_LEAF_NUMCHUNKS(l));
208         ASSERT(zlf->lf_type != ZAP_CHUNK_FREE);
209
210         zlf->lf_type = ZAP_CHUNK_FREE;
211         zlf->lf_next = zap_leaf_phys(l)->l_hdr.lh_freelist;
212         bzero(zlf->lf_pad, sizeof (zlf->lf_pad)); /* help it to compress */
213         zap_leaf_phys(l)->l_hdr.lh_freelist = chunk;
214
215         zap_leaf_phys(l)->l_hdr.lh_nfree++;
216 }
217
218 /*
219  * Routines which manipulate leaf arrays (zap_leaf_array type chunks).
220  */
221
222 static uint16_t
223 zap_leaf_array_create(zap_leaf_t *l, const char *buf,
224     int integer_size, int num_integers)
225 {
226         uint16_t chunk_head;
227         uint16_t *chunkp = &chunk_head;
228         int byten = 0;
229         uint64_t value = 0;
230         int shift = (integer_size - 1) * 8;
231         int len = num_integers;
232
233         ASSERT3U(num_integers * integer_size, <, MAX_ARRAY_BYTES);
234
235         while (len > 0) {
236                 uint16_t chunk = zap_leaf_chunk_alloc(l);
237                 struct zap_leaf_array *la = &ZAP_LEAF_CHUNK(l, chunk).l_array;
238
239                 la->la_type = ZAP_CHUNK_ARRAY;
240                 for (int i = 0; i < ZAP_LEAF_ARRAY_BYTES; i++) {
241                         if (byten == 0)
242                                 value = ldv(integer_size, buf);
243                         la->la_array[i] = value >> shift;
244                         value <<= 8;
245                         if (++byten == integer_size) {
246                                 byten = 0;
247                                 buf += integer_size;
248                                 if (--len == 0)
249                                         break;
250                         }
251                 }
252
253                 *chunkp = chunk;
254                 chunkp = &la->la_next;
255         }
256         *chunkp = CHAIN_END;
257
258         return (chunk_head);
259 }
260
261 static void
262 zap_leaf_array_free(zap_leaf_t *l, uint16_t *chunkp)
263 {
264         uint16_t chunk = *chunkp;
265
266         *chunkp = CHAIN_END;
267
268         while (chunk != CHAIN_END) {
269                 int nextchunk = ZAP_LEAF_CHUNK(l, chunk).l_array.la_next;
270                 ASSERT3U(ZAP_LEAF_CHUNK(l, chunk).l_array.la_type, ==,
271                     ZAP_CHUNK_ARRAY);
272                 zap_leaf_chunk_free(l, chunk);
273                 chunk = nextchunk;
274         }
275 }
276
277 /* array_len and buf_len are in integers, not bytes */
278 static void
279 zap_leaf_array_read(zap_leaf_t *l, uint16_t chunk,
280     int array_int_len, int array_len, int buf_int_len, uint64_t buf_len,
281     void *buf)
282 {
283         int len = MIN(array_len, buf_len);
284         int byten = 0;
285         uint64_t value = 0;
286         char *p = buf;
287
288         ASSERT3U(array_int_len, <=, buf_int_len);
289
290         /* Fast path for one 8-byte integer */
291         if (array_int_len == 8 && buf_int_len == 8 && len == 1) {
292                 struct zap_leaf_array *la = &ZAP_LEAF_CHUNK(l, chunk).l_array;
293                 uint8_t *ip = la->la_array;
294                 uint64_t *buf64 = buf;
295
296                 *buf64 = (uint64_t)ip[0] << 56 | (uint64_t)ip[1] << 48 |
297                     (uint64_t)ip[2] << 40 | (uint64_t)ip[3] << 32 |
298                     (uint64_t)ip[4] << 24 | (uint64_t)ip[5] << 16 |
299                     (uint64_t)ip[6] << 8 | (uint64_t)ip[7];
300                 return;
301         }
302
303         /* Fast path for an array of 1-byte integers (eg. the entry name) */
304         if (array_int_len == 1 && buf_int_len == 1 &&
305             buf_len > array_len + ZAP_LEAF_ARRAY_BYTES) {
306                 while (chunk != CHAIN_END) {
307                         struct zap_leaf_array *la =
308                             &ZAP_LEAF_CHUNK(l, chunk).l_array;
309                         bcopy(la->la_array, p, ZAP_LEAF_ARRAY_BYTES);
310                         p += ZAP_LEAF_ARRAY_BYTES;
311                         chunk = la->la_next;
312                 }
313                 return;
314         }
315
316         while (len > 0) {
317                 struct zap_leaf_array *la = &ZAP_LEAF_CHUNK(l, chunk).l_array;
318
319                 ASSERT3U(chunk, <, ZAP_LEAF_NUMCHUNKS(l));
320                 for (int i = 0; i < ZAP_LEAF_ARRAY_BYTES && len > 0; i++) {
321                         value = (value << 8) | la->la_array[i];
322                         byten++;
323                         if (byten == array_int_len) {
324                                 stv(buf_int_len, p, value);
325                                 byten = 0;
326                                 len--;
327                                 if (len == 0)
328                                         return;
329                                 p += buf_int_len;
330                         }
331                 }
332                 chunk = la->la_next;
333         }
334 }
335
336 static boolean_t
337 zap_leaf_array_match(zap_leaf_t *l, zap_name_t *zn,
338     int chunk, int array_numints)
339 {
340         int bseen = 0;
341
342         if (zap_getflags(zn->zn_zap) & ZAP_FLAG_UINT64_KEY) {
343                 uint64_t *thiskey =
344                     kmem_alloc(array_numints * sizeof (*thiskey), KM_SLEEP);
345                 ASSERT(zn->zn_key_intlen == sizeof (*thiskey));
346
347                 zap_leaf_array_read(l, chunk, sizeof (*thiskey), array_numints,
348                     sizeof (*thiskey), array_numints, thiskey);
349                 boolean_t match = bcmp(thiskey, zn->zn_key_orig,
350                     array_numints * sizeof (*thiskey)) == 0;
351                 kmem_free(thiskey, array_numints * sizeof (*thiskey));
352                 return (match);
353         }
354
355         ASSERT(zn->zn_key_intlen == 1);
356         if (zn->zn_matchtype & MT_NORMALIZE) {
357                 char *thisname = kmem_alloc(array_numints, KM_SLEEP);
358
359                 zap_leaf_array_read(l, chunk, sizeof (char), array_numints,
360                     sizeof (char), array_numints, thisname);
361                 boolean_t match = zap_match(zn, thisname);
362                 kmem_free(thisname, array_numints);
363                 return (match);
364         }
365
366         /*
367          * Fast path for exact matching.
368          * First check that the lengths match, so that we don't read
369          * past the end of the zn_key_orig array.
370          */
371         if (array_numints != zn->zn_key_orig_numints)
372                 return (B_FALSE);
373         while (bseen < array_numints) {
374                 struct zap_leaf_array *la = &ZAP_LEAF_CHUNK(l, chunk).l_array;
375                 int toread = MIN(array_numints - bseen, ZAP_LEAF_ARRAY_BYTES);
376                 ASSERT3U(chunk, <, ZAP_LEAF_NUMCHUNKS(l));
377                 if (bcmp(la->la_array, (char *)zn->zn_key_orig + bseen, toread))
378                         break;
379                 chunk = la->la_next;
380                 bseen += toread;
381         }
382         return (bseen == array_numints);
383 }
384
385 /*
386  * Routines which manipulate leaf entries.
387  */
388
389 int
390 zap_leaf_lookup(zap_leaf_t *l, zap_name_t *zn, zap_entry_handle_t *zeh)
391 {
392         struct zap_leaf_entry *le;
393
394         ASSERT3U(zap_leaf_phys(l)->l_hdr.lh_magic, ==, ZAP_LEAF_MAGIC);
395
396         for (uint16_t *chunkp = LEAF_HASH_ENTPTR(l, zn->zn_hash);
397             *chunkp != CHAIN_END; chunkp = &le->le_next) {
398                 uint16_t chunk = *chunkp;
399                 le = ZAP_LEAF_ENTRY(l, chunk);
400
401                 ASSERT3U(chunk, <, ZAP_LEAF_NUMCHUNKS(l));
402                 ASSERT3U(le->le_type, ==, ZAP_CHUNK_ENTRY);
403
404                 if (le->le_hash != zn->zn_hash)
405                         continue;
406
407                 /*
408                  * NB: the entry chain is always sorted by cd on
409                  * normalized zap objects, so this will find the
410                  * lowest-cd match for MT_NORMALIZE.
411                  */
412                 ASSERT((zn->zn_matchtype == 0) ||
413                     (zap_leaf_phys(l)->l_hdr.lh_flags & ZLF_ENTRIES_CDSORTED));
414                 if (zap_leaf_array_match(l, zn, le->le_name_chunk,
415                     le->le_name_numints)) {
416                         zeh->zeh_num_integers = le->le_value_numints;
417                         zeh->zeh_integer_size = le->le_value_intlen;
418                         zeh->zeh_cd = le->le_cd;
419                         zeh->zeh_hash = le->le_hash;
420                         zeh->zeh_chunkp = chunkp;
421                         zeh->zeh_leaf = l;
422                         return (0);
423                 }
424         }
425
426         return (SET_ERROR(ENOENT));
427 }
428
429 /* Return (h1,cd1 >= h2,cd2) */
430 #define HCD_GTEQ(h1, cd1, h2, cd2) \
431         ((h1 > h2) ? TRUE : ((h1 == h2 && cd1 >= cd2) ? TRUE : FALSE))
432
433 int
434 zap_leaf_lookup_closest(zap_leaf_t *l,
435     uint64_t h, uint32_t cd, zap_entry_handle_t *zeh)
436 {
437         uint64_t besth = -1ULL;
438         uint32_t bestcd = -1U;
439         uint16_t bestlh = ZAP_LEAF_HASH_NUMENTRIES(l)-1;
440         struct zap_leaf_entry *le;
441
442         ASSERT3U(zap_leaf_phys(l)->l_hdr.lh_magic, ==, ZAP_LEAF_MAGIC);
443
444         for (uint16_t lh = LEAF_HASH(l, h); lh <= bestlh; lh++) {
445                 for (uint16_t chunk = zap_leaf_phys(l)->l_hash[lh];
446                     chunk != CHAIN_END; chunk = le->le_next) {
447                         le = ZAP_LEAF_ENTRY(l, chunk);
448
449                         ASSERT3U(chunk, <, ZAP_LEAF_NUMCHUNKS(l));
450                         ASSERT3U(le->le_type, ==, ZAP_CHUNK_ENTRY);
451
452                         if (HCD_GTEQ(le->le_hash, le->le_cd, h, cd) &&
453                             HCD_GTEQ(besth, bestcd, le->le_hash, le->le_cd)) {
454                                 ASSERT3U(bestlh, >=, lh);
455                                 bestlh = lh;
456                                 besth = le->le_hash;
457                                 bestcd = le->le_cd;
458
459                                 zeh->zeh_num_integers = le->le_value_numints;
460                                 zeh->zeh_integer_size = le->le_value_intlen;
461                                 zeh->zeh_cd = le->le_cd;
462                                 zeh->zeh_hash = le->le_hash;
463                                 zeh->zeh_fakechunk = chunk;
464                                 zeh->zeh_chunkp = &zeh->zeh_fakechunk;
465                                 zeh->zeh_leaf = l;
466                         }
467                 }
468         }
469
470         return (bestcd == -1U ? ENOENT : 0);
471 }
472
473 int
474 zap_entry_read(const zap_entry_handle_t *zeh,
475     uint8_t integer_size, uint64_t num_integers, void *buf)
476 {
477         struct zap_leaf_entry *le =
478             ZAP_LEAF_ENTRY(zeh->zeh_leaf, *zeh->zeh_chunkp);
479         ASSERT3U(le->le_type, ==, ZAP_CHUNK_ENTRY);
480
481         if (le->le_value_intlen > integer_size)
482                 return (SET_ERROR(EINVAL));
483
484         zap_leaf_array_read(zeh->zeh_leaf, le->le_value_chunk,
485             le->le_value_intlen, le->le_value_numints,
486             integer_size, num_integers, buf);
487
488         if (zeh->zeh_num_integers > num_integers)
489                 return (SET_ERROR(EOVERFLOW));
490         return (0);
491
492 }
493
494 int
495 zap_entry_read_name(zap_t *zap, const zap_entry_handle_t *zeh, uint16_t buflen,
496     char *buf)
497 {
498         struct zap_leaf_entry *le =
499             ZAP_LEAF_ENTRY(zeh->zeh_leaf, *zeh->zeh_chunkp);
500         ASSERT3U(le->le_type, ==, ZAP_CHUNK_ENTRY);
501
502         if (zap_getflags(zap) & ZAP_FLAG_UINT64_KEY) {
503                 zap_leaf_array_read(zeh->zeh_leaf, le->le_name_chunk, 8,
504                     le->le_name_numints, 8, buflen / 8, buf);
505         } else {
506                 zap_leaf_array_read(zeh->zeh_leaf, le->le_name_chunk, 1,
507                     le->le_name_numints, 1, buflen, buf);
508         }
509         if (le->le_name_numints > buflen)
510                 return (SET_ERROR(EOVERFLOW));
511         return (0);
512 }
513
514 int
515 zap_entry_update(zap_entry_handle_t *zeh,
516     uint8_t integer_size, uint64_t num_integers, const void *buf)
517 {
518         zap_leaf_t *l = zeh->zeh_leaf;
519         struct zap_leaf_entry *le = ZAP_LEAF_ENTRY(l, *zeh->zeh_chunkp);
520
521         int delta_chunks = ZAP_LEAF_ARRAY_NCHUNKS(num_integers * integer_size) -
522             ZAP_LEAF_ARRAY_NCHUNKS(le->le_value_numints * le->le_value_intlen);
523
524         if ((int)zap_leaf_phys(l)->l_hdr.lh_nfree < delta_chunks)
525                 return (SET_ERROR(EAGAIN));
526
527         zap_leaf_array_free(l, &le->le_value_chunk);
528         le->le_value_chunk =
529             zap_leaf_array_create(l, buf, integer_size, num_integers);
530         le->le_value_numints = num_integers;
531         le->le_value_intlen = integer_size;
532         return (0);
533 }
534
535 void
536 zap_entry_remove(zap_entry_handle_t *zeh)
537 {
538         zap_leaf_t *l = zeh->zeh_leaf;
539
540         ASSERT3P(zeh->zeh_chunkp, !=, &zeh->zeh_fakechunk);
541
542         uint16_t entry_chunk = *zeh->zeh_chunkp;
543         struct zap_leaf_entry *le = ZAP_LEAF_ENTRY(l, entry_chunk);
544         ASSERT3U(le->le_type, ==, ZAP_CHUNK_ENTRY);
545
546         zap_leaf_array_free(l, &le->le_name_chunk);
547         zap_leaf_array_free(l, &le->le_value_chunk);
548
549         *zeh->zeh_chunkp = le->le_next;
550         zap_leaf_chunk_free(l, entry_chunk);
551
552         zap_leaf_phys(l)->l_hdr.lh_nentries--;
553 }
554
555 int
556 zap_entry_create(zap_leaf_t *l, zap_name_t *zn, uint32_t cd,
557     uint8_t integer_size, uint64_t num_integers, const void *buf,
558     zap_entry_handle_t *zeh)
559 {
560         uint16_t chunk;
561         struct zap_leaf_entry *le;
562         uint64_t h = zn->zn_hash;
563
564         uint64_t valuelen = integer_size * num_integers;
565
566         int numchunks = 1 + ZAP_LEAF_ARRAY_NCHUNKS(zn->zn_key_orig_numints *
567             zn->zn_key_intlen) + ZAP_LEAF_ARRAY_NCHUNKS(valuelen);
568         if (numchunks > ZAP_LEAF_NUMCHUNKS(l))
569                 return (E2BIG);
570
571         if (cd == ZAP_NEED_CD) {
572                 /* find the lowest unused cd */
573                 if (zap_leaf_phys(l)->l_hdr.lh_flags & ZLF_ENTRIES_CDSORTED) {
574                         cd = 0;
575
576                         for (chunk = *LEAF_HASH_ENTPTR(l, h);
577                             chunk != CHAIN_END; chunk = le->le_next) {
578                                 le = ZAP_LEAF_ENTRY(l, chunk);
579                                 if (le->le_cd > cd)
580                                         break;
581                                 if (le->le_hash == h) {
582                                         ASSERT3U(cd, ==, le->le_cd);
583                                         cd++;
584                                 }
585                         }
586                 } else {
587                         /* old unsorted format; do it the O(n^2) way */
588                         for (cd = 0; ; cd++) {
589                                 for (chunk = *LEAF_HASH_ENTPTR(l, h);
590                                     chunk != CHAIN_END; chunk = le->le_next) {
591                                         le = ZAP_LEAF_ENTRY(l, chunk);
592                                         if (le->le_hash == h &&
593                                             le->le_cd == cd) {
594                                                 break;
595                                         }
596                                 }
597                                 /* If this cd is not in use, we are good. */
598                                 if (chunk == CHAIN_END)
599                                         break;
600                         }
601                 }
602                 /*
603                  * We would run out of space in a block before we could
604                  * store enough entries to run out of CD values.
605                  */
606                 ASSERT3U(cd, <, zap_maxcd(zn->zn_zap));
607         }
608
609         if (zap_leaf_phys(l)->l_hdr.lh_nfree < numchunks)
610                 return (SET_ERROR(EAGAIN));
611
612         /* make the entry */
613         chunk = zap_leaf_chunk_alloc(l);
614         le = ZAP_LEAF_ENTRY(l, chunk);
615         le->le_type = ZAP_CHUNK_ENTRY;
616         le->le_name_chunk = zap_leaf_array_create(l, zn->zn_key_orig,
617             zn->zn_key_intlen, zn->zn_key_orig_numints);
618         le->le_name_numints = zn->zn_key_orig_numints;
619         le->le_value_chunk =
620             zap_leaf_array_create(l, buf, integer_size, num_integers);
621         le->le_value_numints = num_integers;
622         le->le_value_intlen = integer_size;
623         le->le_hash = h;
624         le->le_cd = cd;
625
626         /* link it into the hash chain */
627         /* XXX if we did the search above, we could just use that */
628         uint16_t *chunkp = zap_leaf_rehash_entry(l, chunk);
629
630         zap_leaf_phys(l)->l_hdr.lh_nentries++;
631
632         zeh->zeh_leaf = l;
633         zeh->zeh_num_integers = num_integers;
634         zeh->zeh_integer_size = le->le_value_intlen;
635         zeh->zeh_cd = le->le_cd;
636         zeh->zeh_hash = le->le_hash;
637         zeh->zeh_chunkp = chunkp;
638
639         return (0);
640 }
641
642 /*
643  * Determine if there is another entry with the same normalized form.
644  * For performance purposes, either zn or name must be provided (the
645  * other can be NULL).  Note, there usually won't be any hash
646  * conflicts, in which case we don't need the concatenated/normalized
647  * form of the name.  But all callers have one of these on hand anyway,
648  * so might as well take advantage.  A cleaner but slower interface
649  * would accept neither argument, and compute the normalized name as
650  * needed (using zap_name_alloc(zap_entry_read_name(zeh))).
651  */
652 boolean_t
653 zap_entry_normalization_conflict(zap_entry_handle_t *zeh, zap_name_t *zn,
654     const char *name, zap_t *zap)
655 {
656         struct zap_leaf_entry *le;
657         boolean_t allocdzn = B_FALSE;
658
659         if (zap->zap_normflags == 0)
660                 return (B_FALSE);
661
662         for (uint16_t chunk = *LEAF_HASH_ENTPTR(zeh->zeh_leaf, zeh->zeh_hash);
663             chunk != CHAIN_END; chunk = le->le_next) {
664                 le = ZAP_LEAF_ENTRY(zeh->zeh_leaf, chunk);
665                 if (le->le_hash != zeh->zeh_hash)
666                         continue;
667                 if (le->le_cd == zeh->zeh_cd)
668                         continue;
669
670                 if (zn == NULL) {
671                         zn = zap_name_alloc(zap, name, MT_NORMALIZE);
672                         allocdzn = B_TRUE;
673                 }
674                 if (zap_leaf_array_match(zeh->zeh_leaf, zn,
675                     le->le_name_chunk, le->le_name_numints)) {
676                         if (allocdzn)
677                                 zap_name_free(zn);
678                         return (B_TRUE);
679                 }
680         }
681         if (allocdzn)
682                 zap_name_free(zn);
683         return (B_FALSE);
684 }
685
686 /*
687  * Routines for transferring entries between leafs.
688  */
689
690 static uint16_t *
691 zap_leaf_rehash_entry(zap_leaf_t *l, uint16_t entry)
692 {
693         struct zap_leaf_entry *le = ZAP_LEAF_ENTRY(l, entry);
694         struct zap_leaf_entry *le2;
695         uint16_t *chunkp;
696
697         /*
698          * keep the entry chain sorted by cd
699          * NB: this will not cause problems for unsorted leafs, though
700          * it is unnecessary there.
701          */
702         for (chunkp = LEAF_HASH_ENTPTR(l, le->le_hash);
703             *chunkp != CHAIN_END; chunkp = &le2->le_next) {
704                 le2 = ZAP_LEAF_ENTRY(l, *chunkp);
705                 if (le2->le_cd > le->le_cd)
706                         break;
707         }
708
709         le->le_next = *chunkp;
710         *chunkp = entry;
711         return (chunkp);
712 }
713
714 static uint16_t
715 zap_leaf_transfer_array(zap_leaf_t *l, uint16_t chunk, zap_leaf_t *nl)
716 {
717         uint16_t new_chunk;
718         uint16_t *nchunkp = &new_chunk;
719
720         while (chunk != CHAIN_END) {
721                 uint16_t nchunk = zap_leaf_chunk_alloc(nl);
722                 struct zap_leaf_array *nla =
723                     &ZAP_LEAF_CHUNK(nl, nchunk).l_array;
724                 struct zap_leaf_array *la =
725                     &ZAP_LEAF_CHUNK(l, chunk).l_array;
726                 int nextchunk = la->la_next;
727
728                 ASSERT3U(chunk, <, ZAP_LEAF_NUMCHUNKS(l));
729                 ASSERT3U(nchunk, <, ZAP_LEAF_NUMCHUNKS(l));
730
731                 *nla = *la; /* structure assignment */
732
733                 zap_leaf_chunk_free(l, chunk);
734                 chunk = nextchunk;
735                 *nchunkp = nchunk;
736                 nchunkp = &nla->la_next;
737         }
738         *nchunkp = CHAIN_END;
739         return (new_chunk);
740 }
741
742 static void
743 zap_leaf_transfer_entry(zap_leaf_t *l, int entry, zap_leaf_t *nl)
744 {
745         struct zap_leaf_entry *le = ZAP_LEAF_ENTRY(l, entry);
746         ASSERT3U(le->le_type, ==, ZAP_CHUNK_ENTRY);
747
748         uint16_t chunk = zap_leaf_chunk_alloc(nl);
749         struct zap_leaf_entry *nle = ZAP_LEAF_ENTRY(nl, chunk);
750         *nle = *le; /* structure assignment */
751
752         (void) zap_leaf_rehash_entry(nl, chunk);
753
754         nle->le_name_chunk = zap_leaf_transfer_array(l, le->le_name_chunk, nl);
755         nle->le_value_chunk =
756             zap_leaf_transfer_array(l, le->le_value_chunk, nl);
757
758         zap_leaf_chunk_free(l, entry);
759
760         zap_leaf_phys(l)->l_hdr.lh_nentries--;
761         zap_leaf_phys(nl)->l_hdr.lh_nentries++;
762 }
763
764 /*
765  * Transfer the entries whose hash prefix ends in 1 to the new leaf.
766  */
767 void
768 zap_leaf_split(zap_leaf_t *l, zap_leaf_t *nl, boolean_t sort)
769 {
770         int bit = 64 - 1 - zap_leaf_phys(l)->l_hdr.lh_prefix_len;
771
772         /* set new prefix and prefix_len */
773         zap_leaf_phys(l)->l_hdr.lh_prefix <<= 1;
774         zap_leaf_phys(l)->l_hdr.lh_prefix_len++;
775         zap_leaf_phys(nl)->l_hdr.lh_prefix =
776             zap_leaf_phys(l)->l_hdr.lh_prefix | 1;
777         zap_leaf_phys(nl)->l_hdr.lh_prefix_len =
778             zap_leaf_phys(l)->l_hdr.lh_prefix_len;
779
780         /* break existing hash chains */
781         zap_memset(zap_leaf_phys(l)->l_hash, CHAIN_END,
782             2*ZAP_LEAF_HASH_NUMENTRIES(l));
783
784         if (sort)
785                 zap_leaf_phys(l)->l_hdr.lh_flags |= ZLF_ENTRIES_CDSORTED;
786
787         /*
788          * Transfer entries whose hash bit 'bit' is set to nl; rehash
789          * the remaining entries
790          *
791          * NB: We could find entries via the hashtable instead. That
792          * would be O(hashents+numents) rather than O(numblks+numents),
793          * but this accesses memory more sequentially, and when we're
794          * called, the block is usually pretty full.
795          */
796         for (int i = 0; i < ZAP_LEAF_NUMCHUNKS(l); i++) {
797                 struct zap_leaf_entry *le = ZAP_LEAF_ENTRY(l, i);
798                 if (le->le_type != ZAP_CHUNK_ENTRY)
799                         continue;
800
801                 if (le->le_hash & (1ULL << bit))
802                         zap_leaf_transfer_entry(l, i, nl);
803                 else
804                         (void) zap_leaf_rehash_entry(l, i);
805         }
806 }
807
808 void
809 zap_leaf_stats(zap_t *zap, zap_leaf_t *l, zap_stats_t *zs)
810 {
811         int n = zap_f_phys(zap)->zap_ptrtbl.zt_shift -
812             zap_leaf_phys(l)->l_hdr.lh_prefix_len;
813         n = MIN(n, ZAP_HISTOGRAM_SIZE-1);
814         zs->zs_leafs_with_2n_pointers[n]++;
815
816
817         n = zap_leaf_phys(l)->l_hdr.lh_nentries/5;
818         n = MIN(n, ZAP_HISTOGRAM_SIZE-1);
819         zs->zs_blocks_with_n5_entries[n]++;
820
821         n = ((1<<FZAP_BLOCK_SHIFT(zap)) -
822             zap_leaf_phys(l)->l_hdr.lh_nfree * (ZAP_LEAF_ARRAY_BYTES+1))*10 /
823             (1<<FZAP_BLOCK_SHIFT(zap));
824         n = MIN(n, ZAP_HISTOGRAM_SIZE-1);
825         zs->zs_blocks_n_tenths_full[n]++;
826
827         for (int i = 0; i < ZAP_LEAF_HASH_NUMENTRIES(l); i++) {
828                 int nentries = 0;
829                 int chunk = zap_leaf_phys(l)->l_hash[i];
830
831                 while (chunk != CHAIN_END) {
832                         struct zap_leaf_entry *le =
833                             ZAP_LEAF_ENTRY(l, chunk);
834
835                         n = 1 + ZAP_LEAF_ARRAY_NCHUNKS(le->le_name_numints) +
836                             ZAP_LEAF_ARRAY_NCHUNKS(le->le_value_numints *
837                             le->le_value_intlen);
838                         n = MIN(n, ZAP_HISTOGRAM_SIZE-1);
839                         zs->zs_entries_using_n_chunks[n]++;
840
841                         chunk = le->le_next;
842                         nentries++;
843                 }
844
845                 n = nentries;
846                 n = MIN(n, ZAP_HISTOGRAM_SIZE-1);
847                 zs->zs_buckets_with_n_entries[n]++;
848         }
849 }