lib/test_objpool.c

   1 // SPDX-License-Identifier: GPL-2.0
   2
   3 /*
   4  * Test module for lockless object pool
   5  *
   6  * Copyright: wuqiang.matt@bytedance.com
   7  */
   8
   9 #include <linux/errno.h>
  10 #include <linux/module.h>
  11 #include <linux/moduleparam.h>
  12 #include <linux/completion.h>
  13 #include <linux/kthread.h>
  14 #include <linux/slab.h>
  15 #include <linux/vmalloc.h>
  16 #include <linux/delay.h>
  17 #include <linux/hrtimer.h>
  18 #include <linux/objpool.h>
  19
  20 #define OT_NR_MAX_BULK (16)
  21
  22 /* memory usage */
  23 struct ot_mem_stat {
  24         atomic_long_t alloc;
  25         atomic_long_t free;
  26 };
  27
  28 /* object allocation results */
  29 struct ot_obj_stat {
  30         unsigned long nhits;
  31         unsigned long nmiss;
  32 };
  33
  34 /* control & results per testcase */
  35 struct ot_data {
  36         struct rw_semaphore start;
  37         struct completion wait;
  38         struct completion rcu;
  39         atomic_t nthreads ____cacheline_aligned_in_smp;
  40         atomic_t stop ____cacheline_aligned_in_smp;
  41         struct ot_mem_stat kmalloc;
  42         struct ot_mem_stat vmalloc;
  43         struct ot_obj_stat objects;
  44         u64    duration;
  45 };
  46
  47 /* testcase */
  48 struct ot_test {
  49         int async; /* synchronous or asynchronous */
  50         int mode; /* only mode 0 supported */
  51         int objsz; /* object size */
  52         int duration; /* ms */
  53         int delay; /* ms */
  54         int bulk_normal;
  55         int bulk_irq;
  56         unsigned long hrtimer; /* ms */
  57         const char *name;
  58         struct ot_data data;
  59 };
  60
  61 /* per-cpu worker */
  62 struct ot_item {
  63         struct objpool_head *pool; /* pool head */
  64         struct ot_test *test; /* test parameters */
  65
  66         void (*worker)(struct ot_item *item, int irq);
  67
  68         /* hrtimer control */
  69         ktime_t hrtcycle;
  70         struct hrtimer hrtimer;
  71
  72         int bulk[2]; /* for thread and irq */
  73         int delay;
  74         u32 niters;
  75
  76         /* summary per thread */
  77         struct ot_obj_stat stat[2]; /* thread and irq */
  78         u64 duration;
  79 };
  80
  81 /*
  82  * memory leakage checking
  83  */
  84
  85 static void *ot_kzalloc(struct ot_test *test, long size)
  86 {
  87         void *ptr = kzalloc(size, GFP_KERNEL);
  88
  89         if (ptr)
  90                 atomic_long_add(size, &test->data.kmalloc.alloc);
  91         return ptr;
  92 }
  93
  94 static void ot_kfree(struct ot_test *test, void *ptr, long size)
  95 {
  96         if (!ptr)
  97                 return;
  98         atomic_long_add(size, &test->data.kmalloc.free);
  99         kfree(ptr);
 100 }
 101
 102 static void ot_mem_report(struct ot_test *test)
 103 {
 104         long alloc, free;
 105
 106         pr_info("memory allocation summary for %s\n", test->name);
 107
 108         alloc = atomic_long_read(&test->data.kmalloc.alloc);
 109         free = atomic_long_read(&test->data.kmalloc.free);
 110         pr_info("  kmalloc: %lu - %lu = %lu\n", alloc, free, alloc - free);
 111
 112         alloc = atomic_long_read(&test->data.vmalloc.alloc);
 113         free = atomic_long_read(&test->data.vmalloc.free);
 114         pr_info("  vmalloc: %lu - %lu = %lu\n", alloc, free, alloc - free);
 115 }
 116
 117 /* user object instance */
 118 struct ot_node {
 119         void *owner;
 120         unsigned long data;
 121         unsigned long refs;
 122         unsigned long payload[32];
 123 };
 124
 125 /* user objpool manager */
 126 struct ot_context {
 127         struct objpool_head pool; /* objpool head */
 128         struct ot_test *test; /* test parameters */
 129         void *ptr; /* user pool buffer */
 130         unsigned long size; /* buffer size */
 131         struct rcu_head rcu;
 132 };
 133
 134 static DEFINE_PER_CPU(struct ot_item, ot_pcup_items);
 135
 136 static int ot_init_data(struct ot_data *data)
 137 {
 138         memset(data, 0, sizeof(*data));
 139         init_rwsem(&data->start);
 140         init_completion(&data->wait);
 141         init_completion(&data->rcu);
 142         atomic_set(&data->nthreads, 1);
 143
 144         return 0;
 145 }
 146
 147 static int ot_init_node(void *nod, void *context)
 148 {
 149         struct ot_context *sop = context;
 150         struct ot_node *on = nod;
 151
 152         on->owner = &sop->pool;
 153         return 0;
 154 }
 155
 156 static enum hrtimer_restart ot_hrtimer_handler(struct hrtimer *hrt)
 157 {
 158         struct ot_item *item = container_of(hrt, struct ot_item, hrtimer);
 159         struct ot_test *test = item->test;
 160
 161         if (atomic_read_acquire(&test->data.stop))
 162                 return HRTIMER_NORESTART;
 163
 164         /* do bulk-testings for objects pop/push */
 165         item->worker(item, 1);
 166
 167         hrtimer_forward(hrt, hrt->base->get_time(), item->hrtcycle);
 168         return HRTIMER_RESTART;
 169 }
 170
 171 static void ot_start_hrtimer(struct ot_item *item)
 172 {
 173         if (!item->test->hrtimer)
 174                 return;
 175         hrtimer_start(&item->hrtimer, item->hrtcycle, HRTIMER_MODE_REL);
 176 }
 177
 178 static void ot_stop_hrtimer(struct ot_item *item)
 179 {
 180         if (!item->test->hrtimer)
 181                 return;
 182         hrtimer_cancel(&item->hrtimer);
 183 }
 184
 185 static int ot_init_hrtimer(struct ot_item *item, unsigned long hrtimer)
 186 {
 187         struct hrtimer *hrt = &item->hrtimer;
 188
 189         if (!hrtimer)
 190                 return -ENOENT;
 191
 192         item->hrtcycle = ktime_set(0, hrtimer * 1000000UL);
 193         hrtimer_init(hrt, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
 194         hrt->function = ot_hrtimer_handler;
 195         return 0;
 196 }
 197
 198 static int ot_init_cpu_item(struct ot_item *item,
 199                         struct ot_test *test,
 200                         struct objpool_head *pool,
 201                         void (*worker)(struct ot_item *, int))
 202 {
 203         memset(item, 0, sizeof(*item));
 204         item->pool = pool;
 205         item->test = test;
 206         item->worker = worker;
 207
 208         item->bulk[0] = test->bulk_normal;
 209         item->bulk[1] = test->bulk_irq;
 210         item->delay = test->delay;
 211
 212         /* initialize hrtimer */
 213         ot_init_hrtimer(item, item->test->hrtimer);
 214         return 0;
 215 }
 216
 217 static int ot_thread_worker(void *arg)
 218 {
 219         struct ot_item *item = arg;
 220         struct ot_test *test = item->test;
 221         ktime_t start;
 222
 223         atomic_inc(&test->data.nthreads);
 224         down_read(&test->data.start);
 225         up_read(&test->data.start);
 226         start = ktime_get();
 227         ot_start_hrtimer(item);
 228         do {
 229                 if (atomic_read_acquire(&test->data.stop))
 230                         break;
 231                 /* do bulk-testings for objects pop/push */
 232                 item->worker(item, 0);
 233         } while (!kthread_should_stop());
 234         ot_stop_hrtimer(item);
 235         item->duration = (u64) ktime_us_delta(ktime_get(), start);
 236         if (atomic_dec_and_test(&test->data.nthreads))
 237                 complete(&test->data.wait);
 238
 239         return 0;
 240 }
 241
 242 static void ot_perf_report(struct ot_test *test, u64 duration)
 243 {
 244         struct ot_obj_stat total, normal = {0}, irq = {0};
 245         int cpu, nthreads = 0;
 246
 247         pr_info("\n");
 248         pr_info("Testing summary for %s\n", test->name);
 249
 250         for_each_possible_cpu(cpu) {
 251                 struct ot_item *item = per_cpu_ptr(&ot_pcup_items, cpu);
 252                 if (!item->duration)
 253                         continue;
 254                 normal.nhits += item->stat[0].nhits;
 255                 normal.nmiss += item->stat[0].nmiss;
 256                 irq.nhits += item->stat[1].nhits;
 257                 irq.nmiss += item->stat[1].nmiss;
 258                 pr_info("CPU: %d  duration: %lluus\n", cpu, item->duration);
 259                 pr_info("\tthread:\t%16lu hits \t%16lu miss\n",
 260                         item->stat[0].nhits, item->stat[0].nmiss);
 261                 pr_info("\tirq:   \t%16lu hits \t%16lu miss\n",
 262                         item->stat[1].nhits, item->stat[1].nmiss);
 263                 pr_info("\ttotal: \t%16lu hits \t%16lu miss\n",
 264                         item->stat[0].nhits + item->stat[1].nhits,
 265                         item->stat[0].nmiss + item->stat[1].nmiss);
 266                 nthreads++;
 267         }
 268
 269         total.nhits = normal.nhits + irq.nhits;
 270         total.nmiss = normal.nmiss + irq.nmiss;
 271
 272         pr_info("ALL: \tnthreads: %d  duration: %lluus\n", nthreads, duration);
 273         pr_info("SUM: \t%16lu hits \t%16lu miss\n",
 274                 total.nhits, total.nmiss);
 275
 276         test->data.objects = total;
 277         test->data.duration = duration;
 278 }
 279
 280 /*
 281  * synchronous test cases for objpool manipulation
 282  */
 283
 284 /* objpool manipulation for synchronous mode (percpu objpool) */
 285 static struct ot_context *ot_init_sync_m0(struct ot_test *test)
 286 {
 287         struct ot_context *sop = NULL;
 288         int max = num_possible_cpus() << 3;
 289         gfp_t gfp = GFP_KERNEL;
 290
 291         sop = (struct ot_context *)ot_kzalloc(test, sizeof(*sop));
 292         if (!sop)
 293                 return NULL;
 294         sop->test = test;
 295         if (test->objsz < 512)
 296                 gfp = GFP_ATOMIC;
 297
 298         if (objpool_init(&sop->pool, max, test->objsz,
 299                          gfp, sop, ot_init_node, NULL)) {
 300                 ot_kfree(test, sop, sizeof(*sop));
 301                 return NULL;
 302         }
 303         WARN_ON(max != sop->pool.nr_objs);
 304
 305         return sop;
 306 }
 307
 308 static void ot_fini_sync(struct ot_context *sop)
 309 {
 310         objpool_fini(&sop->pool);
 311         ot_kfree(sop->test, sop, sizeof(*sop));
 312 }
 313
 314 static struct {
 315         struct ot_context * (*init)(struct ot_test *oc);
 316         void (*fini)(struct ot_context *sop);
 317 } g_ot_sync_ops[] = {
 318         {.init = ot_init_sync_m0, .fini = ot_fini_sync},
 319 };
 320
 321 /*
 322  * synchronous test cases: performance mode
 323  */
 324
 325 static void ot_bulk_sync(struct ot_item *item, int irq)
 326 {
 327         struct ot_node *nods[OT_NR_MAX_BULK];
 328         int i;
 329
 330         for (i = 0; i < item->bulk[irq]; i++)
 331                 nods[i] = objpool_pop(item->pool);
 332
 333         if (!irq && (item->delay || !(++(item->niters) & 0x7FFF)))
 334                 msleep(item->delay);
 335
 336         while (i-- > 0) {
 337                 struct ot_node *on = nods[i];
 338                 if (on) {
 339                         on->refs++;
 340                         objpool_push(on, item->pool);
 341                         item->stat[irq].nhits++;
 342                 } else {
 343                         item->stat[irq].nmiss++;
 344                 }
 345         }
 346 }
 347
 348 static int ot_start_sync(struct ot_test *test)
 349 {
 350         struct ot_context *sop;
 351         ktime_t start;
 352         u64 duration;
 353         unsigned long timeout;
 354         int cpu;
 355
 356         /* initialize objpool for syncrhonous testcase */
 357         sop = g_ot_sync_ops[test->mode].init(test);
 358         if (!sop)
 359                 return -ENOMEM;
 360
 361         /* grab rwsem to block testing threads */
 362         down_write(&test->data.start);
 363
 364         for_each_possible_cpu(cpu) {
 365                 struct ot_item *item = per_cpu_ptr(&ot_pcup_items, cpu);
 366                 struct task_struct *work;
 367
 368                 ot_init_cpu_item(item, test, &sop->pool, ot_bulk_sync);
 369
 370                 /* skip offline cpus */
 371                 if (!cpu_online(cpu))
 372                         continue;
 373
 374                 work = kthread_create_on_node(ot_thread_worker, item,
 375                                 cpu_to_node(cpu), "ot_worker_%d", cpu);
 376                 if (IS_ERR(work)) {
 377                         pr_err("failed to create thread for cpu %d\n", cpu);
 378                 } else {
 379                         kthread_bind(work, cpu);
 380                         wake_up_process(work);
 381                 }
 382         }
 383
 384         /* wait a while to make sure all threads waiting at start line */
 385         msleep(20);
 386
 387         /* in case no threads were created: memory insufficient ? */
 388         if (atomic_dec_and_test(&test->data.nthreads))
 389                 complete(&test->data.wait);
 390
 391         // sched_set_fifo_low(current);
 392
 393         /* start objpool testing threads */
 394         start = ktime_get();
 395         up_write(&test->data.start);
 396
 397         /* yeild cpu to worker threads for duration ms */
 398         timeout = msecs_to_jiffies(test->duration);
 399         schedule_timeout_interruptible(timeout);
 400
 401         /* tell workers threads to quit */
 402         atomic_set_release(&test->data.stop, 1);
 403
 404         /* wait all workers threads finish and quit */
 405         wait_for_completion(&test->data.wait);
 406         duration = (u64) ktime_us_delta(ktime_get(), start);
 407
 408         /* cleanup objpool */
 409         g_ot_sync_ops[test->mode].fini(sop);
 410
 411         /* report testing summary and performance results */
 412         ot_perf_report(test, duration);
 413
 414         /* report memory allocation summary */
 415         ot_mem_report(test);
 416
 417         return 0;
 418 }
 419
 420 /*
 421  * asynchronous test cases: pool lifecycle controlled by refcount
 422  */
 423
 424 static void ot_fini_async_rcu(struct rcu_head *rcu)
 425 {
 426         struct ot_context *sop = container_of(rcu, struct ot_context, rcu);
 427         struct ot_test *test = sop->test;
 428
 429         /* here all cpus are aware of the stop event: test->data.stop = 1 */
 430         WARN_ON(!atomic_read_acquire(&test->data.stop));
 431
 432         objpool_fini(&sop->pool);
 433         complete(&test->data.rcu);
 434 }
 435
 436 static void ot_fini_async(struct ot_context *sop)
 437 {
 438         /* make sure the stop event is acknowledged by all cores */
 439         call_rcu(&sop->rcu, ot_fini_async_rcu);
 440 }
 441
 442 static int ot_objpool_release(struct objpool_head *head, void *context)
 443 {
 444         struct ot_context *sop = context;
 445
 446         WARN_ON(!head || !sop || head != &sop->pool);
 447
 448         /* do context cleaning if needed */
 449         if (sop)
 450                 ot_kfree(sop->test, sop, sizeof(*sop));
 451
 452         return 0;
 453 }
 454
 455 static struct ot_context *ot_init_async_m0(struct ot_test *test)
 456 {
 457         struct ot_context *sop = NULL;
 458         int max = num_possible_cpus() << 3;
 459         gfp_t gfp = GFP_KERNEL;
 460
 461         sop = (struct ot_context *)ot_kzalloc(test, sizeof(*sop));
 462         if (!sop)
 463                 return NULL;
 464         sop->test = test;
 465         if (test->objsz < 512)
 466                 gfp = GFP_ATOMIC;
 467
 468         if (objpool_init(&sop->pool, max, test->objsz, gfp, sop,
 469                          ot_init_node, ot_objpool_release)) {
 470                 ot_kfree(test, sop, sizeof(*sop));
 471                 return NULL;
 472         }
 473         WARN_ON(max != sop->pool.nr_objs);
 474
 475         return sop;
 476 }
 477
 478 static struct {
 479         struct ot_context * (*init)(struct ot_test *oc);
 480         void (*fini)(struct ot_context *sop);
 481 } g_ot_async_ops[] = {
 482         {.init = ot_init_async_m0, .fini = ot_fini_async},
 483 };
 484
 485 static void ot_nod_recycle(struct ot_node *on, struct objpool_head *pool,
 486                         int release)
 487 {
 488         struct ot_context *sop;
 489
 490         on->refs++;
 491
 492         if (!release) {
 493                 /* push object back to opjpool for reuse */
 494                 objpool_push(on, pool);
 495                 return;
 496         }
 497
 498         sop = container_of(pool, struct ot_context, pool);
 499         WARN_ON(sop != pool->context);
 500
 501         /* unref objpool with nod removed forever */
 502         objpool_drop(on, pool);
 503 }
 504
 505 static void ot_bulk_async(struct ot_item *item, int irq)
 506 {
 507         struct ot_test *test = item->test;
 508         struct ot_node *nods[OT_NR_MAX_BULK];
 509         int i, stop;
 510
 511         for (i = 0; i < item->bulk[irq]; i++)
 512                 nods[i] = objpool_pop(item->pool);
 513
 514         if (!irq) {
 515                 if (item->delay || !(++(item->niters) & 0x7FFF))
 516                         msleep(item->delay);
 517                 get_cpu();
 518         }
 519
 520         stop = atomic_read_acquire(&test->data.stop);
 521
 522         /* drop all objects and deref objpool */
 523         while (i-- > 0) {
 524                 struct ot_node *on = nods[i];
 525
 526                 if (on) {
 527                         on->refs++;
 528                         ot_nod_recycle(on, item->pool, stop);
 529                         item->stat[irq].nhits++;
 530                 } else {
 531                         item->stat[irq].nmiss++;
 532                 }
 533         }
 534
 535         if (!irq)
 536                 put_cpu();
 537 }
 538
 539 static int ot_start_async(struct ot_test *test)
 540 {
 541         struct ot_context *sop;
 542         ktime_t start;
 543         u64 duration;
 544         unsigned long timeout;
 545         int cpu;
 546
 547         /* initialize objpool for syncrhonous testcase */
 548         sop = g_ot_async_ops[test->mode].init(test);
 549         if (!sop)
 550                 return -ENOMEM;
 551
 552         /* grab rwsem to block testing threads */
 553         down_write(&test->data.start);
 554
 555         for_each_possible_cpu(cpu) {
 556                 struct ot_item *item = per_cpu_ptr(&ot_pcup_items, cpu);
 557                 struct task_struct *work;
 558
 559                 ot_init_cpu_item(item, test, &sop->pool, ot_bulk_async);
 560
 561                 /* skip offline cpus */
 562                 if (!cpu_online(cpu))
 563                         continue;
 564
 565                 work = kthread_create_on_node(ot_thread_worker, item,
 566                                 cpu_to_node(cpu), "ot_worker_%d", cpu);
 567                 if (IS_ERR(work)) {
 568                         pr_err("failed to create thread for cpu %d\n", cpu);
 569                 } else {
 570                         kthread_bind(work, cpu);
 571                         wake_up_process(work);
 572                 }
 573         }
 574
 575         /* wait a while to make sure all threads waiting at start line */
 576         msleep(20);
 577
 578         /* in case no threads were created: memory insufficient ? */
 579         if (atomic_dec_and_test(&test->data.nthreads))
 580                 complete(&test->data.wait);
 581
 582         /* start objpool testing threads */
 583         start = ktime_get();
 584         up_write(&test->data.start);
 585
 586         /* yeild cpu to worker threads for duration ms */
 587         timeout = msecs_to_jiffies(test->duration);
 588         schedule_timeout_interruptible(timeout);
 589
 590         /* tell workers threads to quit */
 591         atomic_set_release(&test->data.stop, 1);
 592
 593         /* do async-finalization */
 594         g_ot_async_ops[test->mode].fini(sop);
 595
 596         /* wait all workers threads finish and quit */
 597         wait_for_completion(&test->data.wait);
 598         duration = (u64) ktime_us_delta(ktime_get(), start);
 599
 600         /* assure rcu callback is triggered */
 601         wait_for_completion(&test->data.rcu);
 602
 603         /*
 604          * now we are sure that objpool is finalized either
 605          * by rcu callback or by worker threads
 606          */
 607
 608         /* report testing summary and performance results */
 609         ot_perf_report(test, duration);
 610
 611         /* report memory allocation summary */
 612         ot_mem_report(test);
 613
 614         return 0;
 615 }
 616
 617 /*
 618  * predefined testing cases:
 619  *   synchronous case / overrun case / async case
 620  *
 621  * async: synchronous or asynchronous testing
 622  * mode: only mode 0 supported
 623  * objsz: object size
 624  * duration: int, total test time in ms
 625  * delay: int, delay (in ms) between each iteration
 626  * bulk_normal: int, repeat times for thread worker
 627  * bulk_irq: int, repeat times for irq consumer
 628  * hrtimer: unsigned long, hrtimer intervnal in ms
 629  * name: char *, tag for current test ot_item
 630  */
 631
 632 #define NODE_COMPACT sizeof(struct ot_node)
 633 #define NODE_VMALLOC (512)
 634
 635 static struct ot_test g_testcases[] = {
 636
 637         /* sync & normal */
 638         {0, 0, NODE_COMPACT, 1000, 0,  1,  0,  0, "sync: percpu objpool"},
 639         {0, 0, NODE_VMALLOC, 1000, 0,  1,  0,  0, "sync: percpu objpool from vmalloc"},
 640
 641         /* sync & hrtimer */
 642         {0, 0, NODE_COMPACT, 1000, 0,  1,  1,  4, "sync & hrtimer: percpu objpool"},
 643         {0, 0, NODE_VMALLOC, 1000, 0,  1,  1,  4, "sync & hrtimer: percpu objpool from vmalloc"},
 644
 645         /* sync & overrun */
 646         {0, 0, NODE_COMPACT, 1000, 0, 16,  0,  0, "sync overrun: percpu objpool"},
 647         {0, 0, NODE_VMALLOC, 1000, 0, 16,  0,  0, "sync overrun: percpu objpool from vmalloc"},
 648
 649         /* async mode */
 650         {1, 0, NODE_COMPACT, 1000, 100,  1,  0,  0, "async: percpu objpool"},
 651         {1, 0, NODE_VMALLOC, 1000, 100,  1,  0,  0, "async: percpu objpool from vmalloc"},
 652
 653         /* async + hrtimer mode */
 654         {1, 0, NODE_COMPACT, 1000, 0,  4,  4,  4, "async & hrtimer: percpu objpool"},
 655         {1, 0, NODE_VMALLOC, 1000, 0,  4,  4,  4, "async & hrtimer: percpu objpool from vmalloc"},
 656 };
 657
 658 static int __init ot_mod_init(void)
 659 {
 660         int i;
 661
 662         /* perform testings */
 663         for (i = 0; i < ARRAY_SIZE(g_testcases); i++) {
 664                 ot_init_data(&g_testcases[i].data);
 665                 if (g_testcases[i].async)
 666                         ot_start_async(&g_testcases[i]);
 667                 else
 668                         ot_start_sync(&g_testcases[i]);
 669         }
 670
 671         /* show tests summary */
 672         pr_info("\n");
 673         pr_info("Summary of testcases:\n");
 674         for (i = 0; i < ARRAY_SIZE(g_testcases); i++) {
 675                 pr_info("    duration: %lluus \thits: %10lu \tmiss: %10lu \t%s\n",
 676                         g_testcases[i].data.duration, g_testcases[i].data.objects.nhits,
 677                         g_testcases[i].data.objects.nmiss, g_testcases[i].name);
 678         }
 679
 680         return -EAGAIN;
 681 }
 682
 683 static void __exit ot_mod_exit(void)
 684 {
 685 }
 686
 687 module_init(ot_mod_init);
 688 module_exit(ot_mod_exit);
 689
 690 MODULE_LICENSE("GPL");