test/interbench/interbench.c

   1 /*******************************************
   2  *
   3  * Interbench - Interactivity benchmark
   4  *
   5  * Author:  Con Kolivas <kernel@kolivas.org>
   6  *
   7  * This program is free software; you can redistribute it and/or modify
   8  * it under the terms of the GNU General Public License as published by
   9  * the Free Software Foundation; either version 2 of the License, or
  10  * (at your option) any later version.
  11  *
  12  * This program is distributed in the hope that it will be useful,
  13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15  * GNU General Public License for more details.
  16  *
  17  * You should have received a copy of the GNU General Public License
  18  * along with this program; if not, write to the Free Software
  19  * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  20  *
  21  *******************************************/
  22
  23 #define _GNU_SOURCE
  24 #define _FILE_OFFSET_BITS 64    /* Large file support */
  25 #define INTERBENCH_VERSION      "0.30"
  26
  27 #include <stdio.h>
  28 #include <stdlib.h>
  29 #include <stdarg.h>
  30 #include <strings.h>
  31 #include <string.h>
  32 #include <unistd.h>
  33 #include <fcntl.h>
  34 #include <sched.h>
  35 #include <time.h>
  36 #include <errno.h>
  37 #include <semaphore.h>
  38 #include <pthread.h>
  39 #include <math.h>
  40 #include <fenv.h>
  41 #include <signal.h>
  42 #include <sys/utsname.h>
  43 #include <sys/time.h>
  44 #include <sys/resource.h>
  45 #include <sys/types.h>
  46 #include <sys/mman.h>
  47 #include <sys/wait.h>
  48 #include "interbench.h"
  49
  50 #define MAX_UNAME_LENGTH        100
  51 #define MAX_LOG_LENGTH          ((MAX_UNAME_LENGTH) + 4)
  52 #define MIN_BLK_SIZE            1024
  53 #define DEFAULT_RESERVE         64
  54 #define MB                      (1024 * 1024)   /* 2^20 bytes */
  55 #define KB                      1024
  56 #define MAX_MEM_IN_MB           (1024 * 64)     /* 64 GB */
  57
  58 struct user_data {
  59         unsigned long loops_per_ms;
  60         unsigned long ram, swap;
  61         int duration;
  62         int do_rt;
  63         int bench_nice;
  64         int load_nice;
  65         unsigned long custom_run;
  66         unsigned long custom_interval;
  67         unsigned long cpu_load;
  68         char logfilename[MAX_LOG_LENGTH];
  69         int log;
  70         char unamer[MAX_UNAME_LENGTH];
  71         char datestamp[13];
  72         FILE *logfile;
  73 } ud = {
  74         .duration = 30,
  75         .cpu_load = 4,
  76         .log = 1,
  77 };
  78
  79 /* Pipes main to/from load and bench processes */
  80 static int m2l[2], l2m[2], m2b[2], b2m[2];
  81
  82 /* Which member of becnhmarks is used when not benchmarking */
  83 #define NOT_BENCHING    (THREADS)
  84 #define CUSTOM          (THREADS - 1)
  85
  86 /*
  87  * To add another load or a benchmark you need to increment the value of
  88  * THREADS, add a function prototype for your function and add an entry to
  89  * the threadlist. To specify whether the function is a benchmark or a load
  90  * set the benchmark and/or load flag as appropriate. The basic requirements
  91  * of a new load can be seen by using emulate_none as a template.
  92  */
  93
  94 void emulate_none(struct thread *th);
  95 void emulate_audio(struct thread *th);
  96 void emulate_video(struct thread *th);
  97 void emulate_x(struct thread *th);
  98 void emulate_game(struct thread *th);
  99 void emulate_burn(struct thread *th);
 100 void emulate_write(struct thread *th);
 101 void emulate_read(struct thread *th);
 102 void emulate_ring(struct thread *th);
 103 void emulate_compile(struct thread *th);
 104 void emulate_memload(struct thread *th);
 105 void emulate_hackbench(struct thread *th);
 106 void emulate_custom(struct thread *th);
 107
 108 struct thread threadlist[THREADS] = {
 109         {.label = "None", .name = emulate_none, .load = 1, .rtload = 1},
 110         {.label = "Audio", .name = emulate_audio, .bench = 1, .rtbench = 1},
 111         {.label = "Video", .name = emulate_video, .bench = 1, .rtbench = 1, .load = 1, .rtload = 1},
 112         {.label = "X", .name = emulate_x, .bench = 1, .load = 1, .rtload = 1},
 113         {.label = "Gaming", .name = emulate_game, .nodeadlines = 1, .bench = 1},
 114         {.label = "Burn", .name = emulate_burn, .load = 1, .rtload = 1},
 115         {.label = "Write", .name = emulate_write, .load = 1, .rtload = 1},
 116         {.label = "Read", .name = emulate_read, .load = 1, .rtload = 1},
 117         {.label = "Ring", .name = emulate_ring, .load = 0, .rtload = 0},        /* No useful data from this */
 118         {.label = "Compile", .name = emulate_compile, .load = 1, .rtload = 1},
 119         {.label = "Memload", .name = emulate_memload, .load = 1, .rtload = 1},
 120         {.label = "Hack", .name = emulate_hackbench, .load = 0, .rtload = 0},   /* This is causing signal headaches */
 121         {.label = "Custom", .name = emulate_custom},    /* Leave custom as last entry */
 122 };
 123
 124 void init_sem(sem_t *sem);
 125 void init_all_sems(struct sems *s);
 126 void initialise_thread(int i);
 127 void start_thread(struct thread *th);
 128 void stop_thread(struct thread *th);
 129
 130 void terminal_error(const char *name)
 131 {
 132         fprintf(stderr, "\n");
 133         perror(name);
 134         exit (1);
 135 }
 136
 137 void terminal_fileopen_error(FILE *fp, char *name)
 138 {
 139         if (fclose(fp) == -1)
 140                 terminal_error("fclose");
 141         terminal_error(name);
 142 }
 143
 144 unsigned long long get_nsecs(struct timespec *myts)
 145 {
 146         if (clock_gettime(CLOCK_REALTIME, myts))
 147                 terminal_error("clock_gettime");
 148         return (myts->tv_sec * 1000000000 + myts->tv_nsec );
 149 }
 150
 151 unsigned long get_usecs(struct timespec *myts)
 152 {
 153         if (clock_gettime(CLOCK_REALTIME, myts))
 154                 terminal_error("clock_gettime");
 155         return (myts->tv_sec * 1000000 + myts->tv_nsec / 1000 );
 156 }
 157
 158 void set_fifo(int prio)
 159 {
 160         struct sched_param sp;
 161
 162         memset(&sp, 0, sizeof(sp));
 163         sp.sched_priority = prio;
 164         if (sched_setscheduler(0, SCHED_FIFO, &sp) == -1) {
 165                 if (errno != EPERM)
 166                         terminal_error("sched_setscheduler");
 167         }
 168 }
 169
 170 void set_mlock(void)
 171 {
 172         int mlockflags;
 173
 174         mlockflags = MCL_CURRENT | MCL_FUTURE;
 175         mlockall(mlockflags);   /* Is not critical if this fails */
 176 }
 177
 178 void set_munlock(void)
 179 {
 180         if (munlockall() == -1)
 181                 terminal_error("munlockall");
 182 }
 183
 184 void set_thread_fifo(pthread_t pthread, int prio)
 185 {
 186         struct sched_param sp;
 187         memset(&sp, 0, sizeof(sp));
 188         sp.sched_priority = prio;
 189         if (pthread_setschedparam(pthread, SCHED_FIFO, &sp) == -1)
 190                 terminal_error("pthread_setschedparam");
 191 }
 192
 193 void set_normal(void)
 194 {
 195         struct sched_param sp;
 196         memset(&sp, 0, sizeof(sp));
 197         sp.sched_priority = 0;
 198         if (sched_setscheduler(0, SCHED_OTHER, &sp) == -1) {
 199                 fprintf(stderr, "Weird, could not unset RT scheduling!\n");
 200         }
 201 }
 202
 203 void set_nice(int prio)
 204 {
 205         if (setpriority(PRIO_PROCESS, 0, prio) == -1)
 206                 terminal_error("setpriority");
 207 }
 208
 209 int test_fifo(void)
 210 {
 211         struct sched_param sp;
 212         memset(&sp, 0, sizeof(sp));
 213         sp.sched_priority = 99;
 214         if (sched_setscheduler(0, SCHED_FIFO, &sp) == -1) {
 215                 if (errno != EPERM)
 216                         terminal_error("sched_setscheduler");
 217                 goto out_fail;
 218         }
 219         if (sched_getscheduler(0) != SCHED_FIFO)
 220                 goto out_fail;
 221         set_normal();
 222         return 1;
 223 out_fail:
 224         set_normal();
 225         return 0;
 226 }
 227
 228 void set_thread_normal(pthread_t pthread)
 229 {
 230         struct sched_param sp;
 231         memset(&sp, 0, sizeof(sp));
 232         sp.sched_priority = 0;
 233         if (pthread_setschedparam(pthread, SCHED_OTHER, &sp) == -1)
 234                 terminal_error("pthread_setschedparam");
 235 }
 236
 237 void sync_flush(void)
 238 {
 239         if ((fflush(NULL)) == EOF)
 240                 terminal_error("fflush");
 241         sync();
 242         sync();
 243         sync();
 244 }
 245
 246 unsigned long compute_allocable_mem(void)
 247 {
 248         unsigned long total = ud.ram + ud.swap;
 249         unsigned long usage = ud.ram * 110 / 100 ;
 250
 251         /* Leave at least DEFAULT_RESERVE free space and check for maths overflow. */
 252         if (total - DEFAULT_RESERVE < usage)
 253                 usage = total - DEFAULT_RESERVE;
 254         usage /= 1024;  /* to megabytes */
 255         if (usage > 2930)
 256                 usage = 2930;
 257         return usage;
 258 }
 259
 260 void burn_loops(unsigned long loops)
 261 {
 262         unsigned long i;
 263
 264         /*
 265          * We need some magic here to prevent the compiler from optimising
 266          * this loop away. Otherwise trying to emulate a fixed cpu load
 267          * with this loop will not work.
 268          */
 269         for (i = 0 ; i < loops ; i++)
 270              asm volatile("" : : : "memory");
 271 }
 272
 273 /* Use this many usecs of cpu time */
 274 void burn_usecs(unsigned long usecs)
 275 {
 276         unsigned long ms_loops;
 277
 278         ms_loops = ud.loops_per_ms / 1000 * usecs;
 279         burn_loops(ms_loops);
 280 }
 281
 282 void microsleep(unsigned long long usecs)
 283 {
 284         struct timespec req, rem;
 285
 286         rem.tv_sec = rem.tv_nsec = 0;
 287
 288         req.tv_sec = usecs / 1000000;
 289         req.tv_nsec = (usecs - (req.tv_sec * 1000000)) * 1000;
 290 continue_sleep:
 291         if ((nanosleep(&req, &rem)) == -1) {
 292                 if (errno == EINTR) {
 293                         if (rem.tv_sec || rem.tv_nsec) {
 294                                 req.tv_sec = rem.tv_sec;
 295                                 req.tv_nsec = rem.tv_nsec;
 296                                 goto continue_sleep;
 297                         }
 298                         goto out;
 299                 }
 300                 terminal_error("nanosleep");
 301         }
 302 out:
 303         return;
 304 }
 305
 306 /*
 307  * Yes, sem_post and sem_wait shouldn't return -1 but they do so we must
 308  * handle it.
 309  */
 310 inline void post_sem(sem_t *s)
 311 {
 312 retry:
 313         if ((sem_post(s)) == -1) {
 314                 if (errno == EINTR)
 315                         goto retry;
 316                 terminal_error("sem_post");
 317         }
 318 }
 319
 320 inline void wait_sem(sem_t *s)
 321 {
 322 retry:
 323         if ((sem_wait(s)) == -1) {
 324                 if (errno == EINTR)
 325                         goto retry;
 326                 terminal_error("sem_wait");
 327         }
 328 }
 329
 330 inline int trywait_sem(sem_t *s)
 331 {
 332         int ret;
 333
 334 retry:
 335         if ((ret = sem_trywait(s)) == -1) {
 336                 if (errno == EINTR)
 337                         goto retry;
 338                 if (errno != EAGAIN)
 339                         terminal_error("sem_trywait");
 340         }
 341         return ret;
 342 }
 343
 344 inline ssize_t Read(int fd, void *buf, size_t count)
 345 {
 346         ssize_t retval;
 347
 348 retry:
 349         retval = read(fd, buf, count);
 350         if (retval == -1) {
 351                 if (errno == EINTR)
 352                         goto retry;
 353                 terminal_error("read");
 354         }
 355         return retval;
 356 }
 357
 358 inline ssize_t Write(int fd, const void *buf, size_t count)
 359 {
 360         ssize_t retval;
 361
 362 retry:
 363         retval = write(fd, &buf, count);
 364         if (retval == -1) {
 365                 if (errno == EINTR)
 366                         goto retry;
 367                 terminal_error("write");
 368         }
 369         return retval;
 370 }
 371
 372 unsigned long periodic_schedule(struct thread *th, unsigned long run_usecs,
 373         unsigned long interval_usecs, unsigned long long deadline)
 374 {
 375         unsigned long long latency, missed_latency;
 376         unsigned long long current_time;
 377         struct tk_thread *tk;
 378         struct data_table *tb;
 379         struct timespec myts;
 380
 381         latency = 0;
 382         tb = th->dt;
 383         tk = &th->tkthread;
 384
 385         current_time = get_usecs(&myts);
 386         if (current_time > deadline + tk->slept_interval)
 387                 latency = current_time - deadline- tk->slept_interval;
 388
 389         /* calculate the latency for missed frames */
 390         missed_latency = 0;
 391
 392         current_time = get_usecs(&myts);
 393         if (interval_usecs && current_time > deadline + interval_usecs) {
 394                 /* We missed the deadline even before we consumed cpu */
 395                 unsigned long intervals;
 396
 397                 deadline += interval_usecs;
 398                 intervals = (current_time - deadline) /
 399                         interval_usecs + 1;
 400
 401                 tb->missed_deadlines += intervals;
 402                 missed_latency = intervals * interval_usecs;
 403                 deadline += intervals * interval_usecs;
 404                 tb->missed_burns += intervals;
 405                 goto bypass_burn;
 406         }
 407
 408         burn_usecs(run_usecs);
 409         current_time = get_usecs(&myts);
 410         tb->achieved_burns++;
 411
 412         /*
 413          * If we meet the deadline we move the deadline forward, otherwise
 414          * we consider it a missed deadline and dropped frame etc.
 415          */
 416         deadline += interval_usecs;
 417         if (deadline >= current_time) {
 418                 tb->deadlines_met++;
 419         } else {
 420                 if (interval_usecs) {
 421                         unsigned long intervals = (current_time - deadline) /
 422                                 interval_usecs + 1;
 423
 424                         tb->missed_deadlines += intervals;
 425                         missed_latency = intervals * interval_usecs;
 426                         deadline += intervals * interval_usecs;
 427                         if (intervals > 1)
 428                                 tb->missed_burns += intervals;
 429                 } else {
 430                         deadline = current_time;
 431                         goto out_nosleep;
 432                 }
 433         }
 434 bypass_burn:
 435         tk->sleep_interval = deadline - current_time;
 436
 437         post_sem(&tk->sem.start);
 438         wait_sem(&tk->sem.complete);
 439 out_nosleep:
 440         /*
 441          * Must add missed_latency to total here as this function may not be
 442          * called again and the missed latency can be lost
 443          */
 444         latency += missed_latency;
 445         if (latency > tb->max_latency)
 446                 tb->max_latency = latency;
 447         tb->total_latency += latency;
 448         tb->sum_latency_squared += latency * latency;
 449         tb->nr_samples++;
 450
 451         return deadline;
 452 }
 453
 454 void initialise_thread_data(struct data_table *tb)
 455 {
 456         tb->max_latency =
 457                 tb->total_latency =
 458                 tb->sum_latency_squared =
 459                 tb->deadlines_met =
 460                 tb->missed_deadlines =
 461                 tb->missed_burns =
 462                 tb->nr_samples = 0;
 463 }
 464
 465 void create_pthread(pthread_t  * thread, pthread_attr_t * attr,
 466         void * (*start_routine)(void *), void *arg)
 467 {
 468         if (pthread_create(thread, attr, start_routine, arg))
 469                 terminal_error("pthread_create");
 470 }
 471
 472 void join_pthread(pthread_t th, void **thread_return)
 473 {
 474         if (pthread_join(th, thread_return))
 475                 terminal_error("pthread_join");
 476 }
 477
 478 void emulate_none(struct thread *th)
 479 {
 480         sem_t *s = &th->sem.stop;
 481         wait_sem(s);
 482 }
 483
 484 #define AUDIO_INTERVAL  (50000)
 485 #define AUDIO_RUN       (AUDIO_INTERVAL / 20)
 486 /* We emulate audio by using 5% cpu and waking every 50ms */
 487 void emulate_audio(struct thread *th)
 488 {
 489         unsigned long long deadline;
 490         sem_t *s = &th->sem.stop;
 491         struct timespec myts;
 492
 493         th->decasecond_deadlines = 1000000 / AUDIO_INTERVAL * 10;
 494         deadline = get_usecs(&myts);
 495
 496         while (1) {
 497                 deadline = periodic_schedule(th, AUDIO_RUN, AUDIO_INTERVAL,
 498                         deadline);
 499                 if (!trywait_sem(s))
 500                         return;
 501         }
 502 }
 503
 504 /* We emulate video by using 40% cpu and waking for 60fps */
 505 #define VIDEO_INTERVAL  (1000000 / 60)
 506 #define VIDEO_RUN       (VIDEO_INTERVAL * 40 / 100)
 507 void emulate_video(struct thread *th)
 508 {
 509         unsigned long long deadline;
 510         sem_t *s = &th->sem.stop;
 511         struct timespec myts;
 512
 513         th->decasecond_deadlines = 1000000 / VIDEO_INTERVAL * 10;
 514         deadline = get_usecs(&myts);
 515
 516         while (1) {
 517                 deadline = periodic_schedule(th, VIDEO_RUN, VIDEO_INTERVAL,
 518                         deadline);
 519                 if (!trywait_sem(s))
 520                         return;
 521         }
 522 }
 523
 524 /*
 525  * We emulate X by running for a variable percentage of cpu from 0-100%
 526  * in 1ms chunks.
 527  */
 528 void emulate_x(struct thread *th)
 529 {
 530         unsigned long long deadline;
 531         sem_t *s = &th->sem.stop;
 532         struct timespec myts;
 533
 534         th->decasecond_deadlines = 100;
 535         deadline = get_usecs(&myts);
 536
 537         while (1) {
 538                 int i, j;
 539                 for (i = 0 ; i <= 100 ; i++) {
 540                         j = 100 - i;
 541                         deadline = periodic_schedule(th, i * 1000, j * 1000,
 542                                 deadline);
 543                         deadline += i * 1000;
 544                         if (!trywait_sem(s))
 545                                 return;
 546                 }
 547         }
 548 }
 549
 550 /*
 551  * We emulate gaming by using 100% cpu and seeing how many frames (jobs
 552  * completed) we can do in that time. Deadlines are meaningless with
 553  * unlocked frame rates. We do not use periodic schedule because for
 554  * this load because this never wants to sleep.
 555  */
 556 #define GAME_INTERVAL   (100000)
 557 #define GAME_RUN        (GAME_INTERVAL)
 558 void emulate_game(struct thread *th)
 559 {
 560         unsigned long long deadline, current_time, latency;
 561         sem_t *s = &th->sem.stop;
 562         struct timespec myts;
 563         struct data_table *tb;
 564
 565         tb = th->dt;
 566         th->decasecond_deadlines = 1000000 / GAME_INTERVAL * 10;
 567
 568         while (1) {
 569                 deadline = get_usecs(&myts) + GAME_INTERVAL;
 570                 burn_usecs(GAME_RUN);
 571                 current_time = get_usecs(&myts);
 572                 /* use usecs instead of simple count for game burn statistics */
 573                 tb->achieved_burns += GAME_RUN;
 574                 if (current_time > deadline) {
 575                         latency = current_time - deadline;
 576                         tb->missed_burns += latency;
 577                 } else
 578                         latency = 0;
 579                 if (latency > tb->max_latency)
 580                         tb->max_latency = latency;
 581                 tb->total_latency += latency;
 582                 tb->sum_latency_squared += latency * latency;
 583                 tb->nr_samples++;
 584                 if (!trywait_sem(s))
 585                         return;
 586         }
 587 }
 588
 589 void *burn_thread(void *t)
 590 {
 591         struct thread *th;
 592         sem_t *s;
 593         long i = (long)t;
 594
 595         th = &threadlist[i];
 596         s = &th->sem.stopchild;
 597
 598         while (1) {
 599                 burn_loops(ud.loops_per_ms);
 600                 if (!trywait_sem(s)) {
 601                         post_sem(s);
 602                         break;
 603                 }
 604         }
 605         return NULL;
 606 }
 607
 608 /* Have ud.cpu_load threads burn cpu continuously */
 609 void emulate_burn(struct thread *th)
 610 {
 611         sem_t *s = &th->sem.stop;
 612         unsigned long i;
 613         long t;
 614         pthread_t burnthreads[ud.cpu_load];
 615
 616         t = th->threadno;
 617         for (i = 0 ; i < ud.cpu_load ; i++)
 618                 create_pthread(&burnthreads[i], NULL, burn_thread,
 619                         (void*)(long) t);
 620         wait_sem(s);
 621         post_sem(&th->sem.stopchild);
 622         for (i = 0 ; i < ud.cpu_load ; i++)
 623                 join_pthread(burnthreads[i], NULL);
 624 }
 625
 626 /* Write a file the size of ram continuously */
 627 void emulate_write(struct thread *th)
 628 {
 629         sem_t *s = &th->sem.stop;
 630         FILE *fp;
 631         char *name = "interbench.write";
 632         void *buf = NULL;
 633         struct stat statbuf;
 634         unsigned long mem;
 635
 636         if (!(fp = fopen(name, "w")))
 637                 terminal_error("fopen");
 638         if (stat(name, &statbuf) == -1)
 639                 terminal_fileopen_error(fp, "stat");
 640         if (statbuf.st_blksize < MIN_BLK_SIZE)
 641                 statbuf.st_blksize = MIN_BLK_SIZE;
 642         mem = ud.ram / (statbuf.st_blksize / 1024);     /* kilobytes to blocks */
 643         if (!(buf = calloc(1, statbuf.st_blksize)))
 644                 terminal_fileopen_error(fp, "calloc");
 645         if (fclose(fp) == -1)
 646                 terminal_error("fclose");
 647
 648         while (1) {
 649                 unsigned int i;
 650
 651                 if (!(fp = fopen(name, "w")))
 652                         terminal_error("fopen");
 653                 if (stat(name, &statbuf) == -1)
 654                         terminal_fileopen_error(fp, "stat");
 655                 for (i = 0 ; i < mem; i++) {
 656                         if (fwrite(buf, statbuf.st_blksize, 1, fp) != 1)
 657                                 terminal_fileopen_error(fp, "fwrite");
 658                         if (!trywait_sem(s))
 659                                 goto out;
 660                 }
 661                 if (fclose(fp) == -1)
 662                         terminal_error("fclose");
 663         }
 664
 665 out:
 666         if (fclose(fp) == -1)
 667                 terminal_error("fclose");
 668         if (remove(name) == -1)
 669                 terminal_error("remove");
 670         sync_flush();
 671 }
 672
 673 /* Read a file the size of ram continuously */
 674 void emulate_read(struct thread *th)
 675 {
 676         sem_t *s = &th->sem.stop;
 677         char *name = "interbench.read";
 678         void *buf = NULL;
 679         struct stat statbuf;
 680         unsigned long bsize;
 681         int tmp;
 682
 683         if ((tmp = open(name, O_RDONLY)) == -1)
 684                 terminal_error("open");
 685         if (stat(name, &statbuf) == -1)
 686                 terminal_error("stat");
 687         bsize = statbuf.st_blksize;
 688         if (!(buf = malloc(bsize)))
 689                 terminal_error("malloc");
 690
 691         while (1) {
 692                 int rd;
 693
 694                 /*
 695                  * We have to read the whole file before quitting the load
 696                  * to prevent the data being cached for the next read. This
 697                  * is also the reason the file is the size of physical ram.
 698                  */
 699                 while ((rd = Read(tmp , buf, bsize)) > 0);
 700                 if(!trywait_sem(s))
 701                         return;
 702                 if (lseek(tmp, (off_t)0, SEEK_SET) == -1)
 703                         terminal_error("lseek");
 704         }
 705 }
 706
 707 #define RINGTHREADS     4
 708
 709 struct thread ringthreads[RINGTHREADS];
 710
 711 void *ring_thread(void *t)
 712 {
 713         struct thread *th;
 714         struct sems *s;
 715         int i, post_to;
 716
 717         i = (long)t;
 718         th = &ringthreads[i];
 719         s = &th->sem;
 720         post_to = i + 1;
 721         if (post_to == RINGTHREADS)
 722                 post_to = 0;
 723         if (i == 0)
 724                 post_sem(&s->ready);
 725
 726         while (1) {
 727                 wait_sem(&s->start);
 728                 post_sem(&ringthreads[post_to].sem.start);
 729                 if (!trywait_sem(&s->stop))
 730                         goto out;
 731         }
 732 out:
 733         post_sem(&ringthreads[post_to].sem.start);
 734         post_sem(&s->complete);
 735         return NULL;
 736 }
 737
 738 /* Create a ring of 4 processes that wake each other up in a circle */
 739 void emulate_ring(struct thread *th)
 740 {
 741         sem_t *s = &th->sem.stop;
 742         int i;
 743
 744         for (i = 0 ; i < RINGTHREADS ; i++) {
 745                 init_all_sems(&ringthreads[i].sem);
 746                 create_pthread(&ringthreads[i].pthread, NULL,
 747                         ring_thread, (void*)(long) i);
 748         }
 749
 750         wait_sem(&ringthreads[0].sem.ready);
 751         post_sem(&ringthreads[0].sem.start);
 752         wait_sem(s);
 753         for (i = 0 ; i < RINGTHREADS ; i++)
 754                 post_sem(&ringthreads[i].sem.stop);
 755         for (i = 0 ; i < RINGTHREADS ; i++) {
 756                 wait_sem(&ringthreads[i].sem.complete);
 757                 join_pthread(ringthreads[i].pthread, NULL);
 758         }
 759 }
 760
 761 /* We emulate a compile by running burn, write and read threads simultaneously */
 762 void emulate_compile(struct thread *th)
 763 {
 764         sem_t *s = &th->sem.stop;
 765         unsigned long i, threads[3];
 766
 767         for (i = 0 ; i < THREADS ; i++) {
 768                 if (threadlist[i].label == "Burn")
 769                         threads[0] = i;
 770                 if (threadlist[i].label == "Write")
 771                         threads[1] = i;
 772                 if (threadlist[i].label == "Read")
 773                         threads[2] = i;
 774         }
 775         for (i = 0 ; i < 3 ; i++) {
 776                 if (!threads[i]) {
 777                         fprintf(stderr, "Can't find all threads for compile load\n");
 778                         exit(1);
 779                 }
 780         }
 781         for (i = 0 ; i < 3 ; i++) {
 782                 initialise_thread(threads[i]);
 783                 start_thread(&threadlist[threads[i]]);
 784         }
 785         wait_sem(s);
 786         for (i = 0 ; i < 3 ; i++)
 787                 stop_thread(&threadlist[threads[i]]);
 788 }
 789
 790 int *grab_and_touch (char *block[], int i)
 791 {
 792         block[i] = (char *) malloc(MB);
 793         if (!block[i])
 794                 return NULL;
 795         return (memset(block[i], 1, MB));
 796 }
 797
 798 /* We emulate a memory load by allocating and torturing 110% of available ram */
 799 void emulate_memload(struct thread *th)
 800 {
 801         sem_t *s = &th->sem.stop;
 802         unsigned long touchable_mem, i;
 803         char *mem_block[MAX_MEM_IN_MB];
 804         void *success;
 805
 806         touchable_mem = compute_allocable_mem();
 807         /* loop until we're killed, frobbing memory in various perverted ways */
 808         while (1) {
 809                 for (i = 0;  i < touchable_mem; i++) {
 810                         success = grab_and_touch(mem_block, i);
 811                         if (!success) {
 812                                 touchable_mem = i-1;
 813                                 break;
 814                         }
 815                 }
 816                 if (!trywait_sem(s))
 817                         goto out_freemem;
 818                 for (i = 0;  i < touchable_mem; i++) {
 819                         memcpy(mem_block[i], mem_block[(i + touchable_mem / 2) %
 820                                 touchable_mem], MB);
 821                         if (!trywait_sem(s))
 822                                 goto out_freemem;
 823                 }
 824                 for (i = 0; i < touchable_mem; i++) {
 825                         free(mem_block[i]);
 826                 }
 827                 if (!trywait_sem(s))
 828                         goto out;
 829         }
 830 out_freemem:
 831         for (i = 0; i < touchable_mem; i++)
 832                 free(mem_block[i]);
 833 out:
 834         return;
 835 }
 836
 837 struct thread hackthread;
 838
 839 void emulate_hackbench(struct thread *th)
 840 {
 841         sem_t *s = &th->sem.stop;
 842
 843         init_all_sems(&hackthread.sem);
 844         create_pthread(&hackthread.pthread, NULL, hackbench_thread, (void *) 0);
 845
 846         wait_sem(s);
 847
 848         post_sem(&hackthread.sem.stop);
 849         wait_sem(&hackthread.sem.complete);
 850
 851         join_pthread(hackthread.pthread, NULL);
 852 }
 853
 854 #define CUSTOM_INTERVAL (ud.custom_interval)
 855 #define CUSTOM_RUN      (ud.custom_run)
 856 void emulate_custom(struct thread *th)
 857 {
 858         unsigned long long deadline;
 859         sem_t *s = &th->sem.stop;
 860         struct timespec myts;
 861
 862         th->decasecond_deadlines = 1000000 / CUSTOM_INTERVAL * 10;
 863         deadline = get_usecs(&myts);
 864
 865         while (1) {
 866                 deadline = periodic_schedule(th, CUSTOM_RUN, CUSTOM_INTERVAL,
 867                         deadline);
 868                 if (!trywait_sem(s))
 869                         return;
 870         }
 871 }
 872
 873 void *timekeeping_thread(void *t)
 874 {
 875         struct thread *th;
 876         struct tk_thread *tk;
 877         struct sems *s;
 878         struct timespec myts;
 879         long i = (long)t;
 880
 881         th = &threadlist[i];
 882         tk = &th->tkthread;
 883         s = &th->tkthread.sem;
 884         /*
 885          * If this timekeeping thread is that of a benchmarked thread we run
 886          * even higher priority than the benched thread is if running real
 887          * time. Otherwise, the load timekeeping thread, which does not need
 888          * accurate accounting remains SCHED_NORMAL;
 889          */
 890         if (th->dt != &th->benchmarks[NOT_BENCHING])
 891                 set_fifo(96);
 892         /* These values must be changed at the appropriate places or race */
 893         tk->sleep_interval = tk->slept_interval = 0;
 894         post_sem(&s->ready);
 895
 896         while (1) {
 897                 unsigned long start_time, now;
 898
 899                 if (!trywait_sem(&s->stop))
 900                         goto out;
 901                 wait_sem(&s->start);
 902                 tk->slept_interval = 0;
 903                 start_time = get_usecs(&myts);
 904                 if (!trywait_sem(&s->stop))
 905                         goto out;
 906                 if (tk->sleep_interval) {
 907                         unsigned long diff = 0;
 908                         microsleep(tk->sleep_interval);
 909                         now = get_usecs(&myts);
 910                         /* now should always be > start_time but... */
 911                         if (now > start_time) {
 912                                 diff = now - start_time;
 913                                 if (diff > tk->sleep_interval)
 914                                         tk->slept_interval = diff -
 915                                                 tk->sleep_interval;
 916                         }
 917                 }
 918                 tk->sleep_interval = 0;
 919                 post_sem(&s->complete);
 920         }
 921 out:
 922         return NULL;
 923 }
 924
 925 /*
 926  * All the sleep functions such as nanosleep can only guarantee that they
 927  * sleep for _at least_ the time requested. We work around this by having
 928  * a high priority real time thread that accounts for the extra time slept
 929  * in nanosleep. This allows wakeup latency of the tested thread to be
 930  * accurate and reflect true scheduling delays.
 931  */
 932 void *emulation_thread(void *t)
 933 {
 934         struct thread *th;
 935         struct tk_thread *tk;
 936         struct sems *s, *tks;
 937         long i = (long)t;
 938
 939         th = &threadlist[i];
 940         tk = &th->tkthread;
 941         s = &th->sem;
 942         tks = &tk->sem;
 943         init_all_sems(tks);
 944
 945         /* Start the timekeeping thread */
 946         create_pthread(&th->tk_pthread, NULL, timekeeping_thread,
 947                 (void*)(long) i);
 948         /* Wait for timekeeping thread to be ready */
 949         wait_sem(&tks->ready);
 950
 951         /* Tell main we're ready to start*/
 952         post_sem(&s->ready);
 953
 954         /* Wait for signal from main to start thread */
 955         wait_sem(&s->start);
 956
 957         /* Start the actual function being benched/or running as load */
 958         th->name(th);
 959
 960         /* Stop the timekeeping thread */
 961         post_sem(&tks->stop);
 962         post_sem(&tks->start);
 963         join_pthread(th->tk_pthread, NULL);
 964
 965         /* Tell main we've finished */
 966         post_sem(&s->complete);
 967         return NULL;
 968 }
 969
 970 /*
 971  * In an unoptimised loop we try to benchmark how many meaningless loops
 972  * per second we can perform on this hardware to fairly accurately
 973  * reproduce certain percentage cpu usage
 974  */
 975 void calibrate_loop(void)
 976 {
 977         unsigned long long start_time, loops_per_msec, run_time = 0;
 978         unsigned long loops;
 979         struct timespec myts;
 980
 981         loops_per_msec = 100000;
 982 redo:
 983         /* Calibrate to within 1% accuracy */
 984         while (run_time > 1010000 || run_time < 990000) {
 985                 loops = loops_per_msec;
 986                 start_time = get_nsecs(&myts);
 987                 burn_loops(loops);
 988                 run_time = get_nsecs(&myts) - start_time;
 989                 loops_per_msec = (1000000 * loops_per_msec / run_time ? :
 990                         loops_per_msec);
 991         }
 992
 993         /* Rechecking after a pause increases reproducibility */
 994         sleep(1);
 995         loops = loops_per_msec;
 996         start_time = get_nsecs(&myts);
 997         burn_loops(loops);
 998         run_time = get_nsecs(&myts) - start_time;
 999
1000         /* Tolerate 5% difference on checking */
1001         if (run_time > 1050000 || run_time < 950000)
1002                 goto redo;
1003
1004         ud.loops_per_ms = loops_per_msec;
1005 }
1006
1007 void log_output(const char *format, ...) __attribute__ ((format(printf, 1, 2)));
1008
1009 /* Output to console +/- logfile */
1010 void log_output(const char *format, ...)
1011 {
1012         va_list ap;
1013
1014         va_start(ap, format);
1015         if (vprintf(format, ap) == -1)
1016                 terminal_error("vprintf");
1017         va_end(ap);
1018         if (ud.log) {
1019                 va_start(ap, format);
1020                 if (vfprintf(ud.logfile, format, ap) == -1)
1021                         terminal_error("vpfrintf");
1022                 va_end(ap);
1023         }
1024         fflush(NULL);
1025 }
1026
1027 /* Calculate statistics and output them */
1028 void show_latencies(struct thread *th)
1029 {
1030         struct data_table *tbj;
1031         struct tk_thread *tk;
1032         double average_latency, deadlines_met, samples_met, sd, max_latency;
1033         long double variance = 0;
1034
1035         tbj = th->dt;
1036         tk = &th->tkthread;
1037
1038         if (tbj->nr_samples > 1) {
1039                 average_latency = tbj->total_latency / tbj->nr_samples;
1040                 variance = (tbj->sum_latency_squared - (average_latency *
1041                         average_latency) / tbj->nr_samples) / (tbj->nr_samples - 1);
1042                 sd = sqrtl(variance);
1043         } else {
1044                 average_latency = tbj->total_latency;
1045                 sd = 0.0;
1046         }
1047
1048         /*
1049          * Landing on the boundary of a deadline can make loaded runs appear
1050          * to do more work than unloaded due to tiny duration differences.
1051          */
1052         if (tbj->achieved_burns > 0)
1053                 samples_met = (double)tbj->achieved_burns /
1054                     (double)(tbj->achieved_burns + tbj->missed_burns) * 100;
1055         else
1056                 samples_met = 0.0;
1057         max_latency = tbj->max_latency;
1058         /* When benchmarking rt we represent the data in us */
1059         if (!ud.do_rt) {
1060                 average_latency /= 1000;
1061                 sd /= 1000;
1062                 max_latency /= 1000;
1063         }
1064         if (tbj->deadlines_met == 0)
1065                 deadlines_met = 0;
1066         else
1067                 deadlines_met = (double)tbj->deadlines_met /
1068                     (double)(tbj->missed_deadlines + tbj->deadlines_met) * 100;
1069
1070         /* Messy nonsense to format the output nicely */
1071         if (average_latency >= 100)
1072                 log_output("%7.0f +/- ", average_latency);
1073         else
1074                 log_output("%7.3g +/- ", average_latency);
1075         if (sd >= 100)
1076                 log_output("%-9.0f", sd);
1077         else
1078                 log_output("%-9.3g", sd);
1079         if (max_latency >= 100)
1080                 log_output("%7.0f\t", max_latency);
1081         else
1082                 log_output("%7.3g\t", max_latency);
1083         log_output("\t%4.3g", samples_met);
1084         if (!th->nodeadlines)
1085                 log_output("\t%11.3g", deadlines_met);
1086         log_output("\n");
1087         sync_flush();
1088 }
1089
1090 void create_read_file(void)
1091 {
1092         unsigned int i;
1093         FILE *fp;
1094         char *name = "interbench.read";
1095         void *buf = NULL;
1096         struct stat statbuf;
1097         unsigned long mem, bsize;
1098         int tmp;
1099
1100         if ((tmp = open(name, O_RDONLY)) == -1) {
1101                 if (errno != ENOENT)
1102                         terminal_error("open");
1103                 goto write;
1104         }
1105         if (stat(name, &statbuf) == -1)
1106                 terminal_error("stat");
1107         if (statbuf.st_blksize < MIN_BLK_SIZE)
1108                 statbuf.st_blksize = MIN_BLK_SIZE;
1109         bsize = statbuf.st_blksize;
1110         if (statbuf.st_size / 1024 / bsize == ud.ram / bsize)
1111                 return;
1112         if (remove(name) == -1)
1113                 terminal_error("remove");
1114 write:
1115         fprintf(stderr,"Creating file for read load...\n");
1116         if (!(fp = fopen(name, "w")))
1117                 terminal_error("fopen");
1118         if (stat(name, &statbuf) == -1)
1119                 terminal_fileopen_error(fp, "stat");
1120         if (statbuf.st_blksize < MIN_BLK_SIZE)
1121                 statbuf.st_blksize = MIN_BLK_SIZE;
1122         bsize = statbuf.st_blksize;
1123         if (!(buf = calloc(1, bsize)))
1124                 terminal_fileopen_error(fp, "calloc");
1125         mem = ud.ram / (bsize / 1024);  /* kilobytes to blocks */
1126
1127         for (i = 0 ; i < mem; i++) {
1128                 if (fwrite(buf, bsize, 1, fp) != 1)
1129                         terminal_fileopen_error(fp, "fwrite");
1130         }
1131         if (fclose(fp) == -1)
1132                 terminal_error("fclose");
1133         sync_flush();
1134 }
1135
1136 void get_ram(void)
1137 {
1138         FILE *meminfo;
1139         char aux[256];
1140
1141         if(!(meminfo = fopen("/proc/meminfo", "r")))
1142                 terminal_error("fopen");
1143
1144         ud.ram = ud.swap = 0;
1145         while( !feof(meminfo) && !fscanf(meminfo, "MemTotal: %lu kB", &ud.ram) )
1146             fgets(aux,sizeof(aux),meminfo);
1147         while( !feof(meminfo) && !fscanf(meminfo, "SwapTotal: %lu kB", &ud.swap) )
1148             fgets(aux,sizeof(aux),meminfo);
1149         if (fclose(meminfo) == -1)
1150                 terminal_error("fclose");
1151
1152         if( !ud.ram || !ud.swap ) {
1153                 unsigned long i;
1154                 fprintf(stderr, "\nCould not get memory or swap size. ");
1155                 fprintf(stderr, "Will not perform mem_load\n");
1156                 for (i = 0 ; i < THREADS ; i++) {
1157                         if (threadlist[i].label == "Memload") {
1158                                 threadlist[i].load = 0;
1159                                 threadlist[i].rtload = 0;
1160                         }
1161                 }
1162         }
1163 }
1164
1165 void get_logfilename(void)
1166 {
1167         struct tm *mytm;
1168         struct utsname buf;
1169         time_t t;
1170         int year, month, day, hours, minutes;
1171
1172         time(&t);
1173         if (uname(&buf) == -1)
1174                 terminal_error("uname");
1175         if (!(mytm = localtime(&t)))
1176                 terminal_error("localtime");
1177         year = mytm->tm_year + 1900;
1178         month = mytm->tm_mon + 1;
1179         day = mytm->tm_mday;
1180         hours = mytm->tm_hour;
1181         minutes = mytm->tm_min;
1182         strncpy(ud.unamer, buf.release, MAX_UNAME_LENGTH);
1183
1184         sprintf(ud.datestamp, "%2d%02d%02d%02d%02d",
1185                 year, month, day, hours, minutes);
1186         snprintf(ud.logfilename, MAX_LOG_LENGTH, "%s.log", ud.unamer);
1187 }
1188
1189 void start_thread(struct thread *th)
1190 {
1191         post_sem(&th->sem.start);
1192 }
1193
1194 void stop_thread(struct thread *th)
1195 {
1196         post_sem(&th->sem.stop);
1197         wait_sem(&th->sem.complete);
1198
1199         /* Kill the thread */
1200         join_pthread(th->pthread, NULL);
1201 }
1202
1203 void init_sem(sem_t *sem)
1204 {
1205         if (sem_init(sem, 0, 0))
1206                 terminal_error("sem_init");
1207 }
1208
1209 void init_all_sems(struct sems *s)
1210 {
1211         /* Initialise the semaphores */
1212         init_sem(&s->ready);
1213         init_sem(&s->start);
1214         init_sem(&s->stop);
1215         init_sem(&s->complete);
1216         init_sem(&s->stopchild);
1217 }
1218
1219 void initialise_thread(int i)
1220 {
1221         struct thread *th = &threadlist[i];
1222
1223         init_all_sems(&th->sem);
1224         /* Create the threads. Yes, the (long) cast is fugly but it's safe*/
1225         create_pthread(&th->pthread, NULL, emulation_thread, (void*)(long)i);
1226
1227         wait_sem(&th->sem.ready);
1228         /*
1229          * We set this pointer generically to NOT_BENCHING and set it to the
1230          * benchmarked array entry only on benched threads.
1231          */
1232         th->dt = &th->benchmarks[NOT_BENCHING];
1233         initialise_thread_data(th->dt);
1234
1235 }
1236
1237 /* A pseudo-semaphore for processes using a pipe */
1238 void wait_on(int pype)
1239 {
1240         int retval, buf = 0;
1241
1242         retval = Read(pype, &buf, sizeof(buf));
1243         if (retval == 0) {
1244                 fprintf(stderr, "\nread returned 0\n");
1245                 exit (1);
1246         }
1247 }
1248
1249 void wakeup_with(int pype)
1250 {
1251         int retval, buf = 1;
1252
1253         retval = Write(pype, &buf, sizeof(buf));
1254         if (retval == 0) {
1255                 fprintf(stderr, "\nwrite returned 0\n");
1256                 exit (1);
1257         }
1258 }
1259
1260 void run_loadchild(int j)
1261 {
1262         struct thread *thj;
1263         thj = &threadlist[j];
1264
1265         set_nice(ud.load_nice);
1266         initialise_thread(j);
1267
1268         /* Tell main we're ready */
1269         wakeup_with(l2m[1]);
1270
1271         /* Main tells us we're ready */
1272         wait_on(m2l[0]);
1273         start_thread(thj);
1274
1275         /* Tell main we received the start and are running */
1276         wakeup_with(l2m[1]);
1277
1278         /* Main tells us to stop */
1279         wait_on(m2l[0]);
1280         stop_thread(thj);
1281
1282         /* Tell main we've finished */
1283         wakeup_with(l2m[1]);
1284         exit (0);
1285 }
1286
1287 void run_benchchild(int i, int j)
1288 {
1289         struct thread *thi;
1290
1291         thi = &threadlist[i];
1292
1293         set_nice(ud.bench_nice);
1294         if (ud.do_rt)
1295                 set_mlock();
1296         initialise_thread(i);
1297         /* Point the data table to the appropriate load being tested */
1298         thi->dt = &thi->benchmarks[j];
1299         initialise_thread_data(thi->dt);
1300         if (ud.do_rt)
1301                 set_thread_fifo(thi->pthread, 95);
1302
1303         /* Tell main we're ready */
1304         wakeup_with(b2m[1]);
1305
1306         /* Main tells us we're ready */
1307         wait_on(m2b[0]);
1308         start_thread(thi);
1309
1310         /* Tell main we have started */
1311         wakeup_with(b2m[1]);
1312
1313         /* Main tells us to stop */
1314         wait_on(m2b[0]);
1315         stop_thread(thi);
1316
1317         if (ud.do_rt) {
1318                 set_thread_normal(thi->pthread);
1319                 set_munlock();
1320         }
1321         show_latencies(thi);
1322
1323         /* Tell main we've finished */
1324         wakeup_with(b2m[1]);
1325         exit(0);
1326 }
1327
1328 void bench(int i, int j)
1329 {
1330         pid_t bench_pid, load_pid;
1331
1332         if ((load_pid = fork()) == -1)
1333                 terminal_error("fork");
1334         if (!load_pid)
1335                 run_loadchild(j);
1336
1337         /* Wait for load process to be ready */
1338
1339         wait_on(l2m[0]);
1340         if ((bench_pid = fork()) == -1)
1341                 terminal_error("fork");
1342         if (!bench_pid)
1343                 run_benchchild(i, j);
1344
1345         /* Wait for bench process to be ready */
1346         wait_on(b2m[0]);
1347
1348         /*
1349          * We want to be higher priority than everything to signal them to
1350          * stop and we lock our memory if we can as well
1351          */
1352         set_fifo(99);
1353         set_mlock();
1354
1355         /* Wakeup the load process */
1356         wakeup_with(m2l[1]);
1357         /* Load tells it has received the first message and is running */
1358         wait_on(l2m[0]);
1359
1360         /* After a small delay, wake up the benched process */
1361         sleep(1);
1362         wakeup_with(m2b[1]);
1363
1364         /* Bench tells it has received the first message and is running */
1365         wait_on(b2m[0]);
1366         microsleep(ud.duration * 1000000);
1367
1368         /* Tell the benched process to stop its threads and output results */
1369         wakeup_with(m2b[1]);
1370
1371         /* Tell the load process to stop its threads */
1372         wakeup_with(m2l[1]);
1373
1374         /* Return to SCHED_NORMAL */
1375         set_normal();
1376         set_munlock();
1377
1378         /* Wait for load and bench processes to terminate */
1379         wait_on(l2m[0]);
1380         wait_on(b2m[0]);
1381 }
1382
1383 void init_pipe(int *pype)
1384 {
1385         if (pipe(pype) == -1)
1386                 terminal_error("pipe");
1387 }
1388
1389 void init_pipes(void)
1390 {
1391         init_pipe(m2l);
1392         init_pipe(l2m);
1393         init_pipe(m2b);
1394         init_pipe(b2m);
1395 }
1396
1397 void usage(void)
1398 {
1399         /* Affinity commented out till working on all architectures */
1400         fprintf(stderr, "interbench v " INTERBENCH_VERSION " by Con Kolivas\n");
1401         fprintf(stderr, "interbench [-l <int>] [-L <int>] [-t <int] [-B <int>] [-N <int>]\n");
1402         fprintf(stderr, "\t[-b] [-c] [-r] [-C <int> -I <int>] [-m <comment>]\n");
1403         fprintf(stderr, "\t[-w <load type>] [-x <load type>] [-W <bench>] [-X <bench>]\n");
1404         fprintf(stderr, "\t[-h\]\n\n");
1405         fprintf(stderr, " -l\tUse <int> loops per sec (default: use saved benchmark)\n");
1406         fprintf(stderr, " -L\tUse cpu load of <int> with burn load (default: 4)\n");
1407         fprintf(stderr, " -t\tSeconds to run each benchmark (default: 30)\n");
1408         fprintf(stderr, " -B\tNice the benchmarked thread to <int> (default: 0)\n");
1409         fprintf(stderr, " -N\tNice the load thread to <int> (default: 0)\n");
1410         //fprintf(stderr, " -u\tImitate uniprocessor\n");
1411         fprintf(stderr, " -b\tBenchmark loops_per_ms even if it is already known\n");
1412         fprintf(stderr, " -c\tOutput to console only (default: use console and logfile)\n");
1413         fprintf(stderr, " -r\tPerform real time scheduling benchmarks (default: non-rt)\n");
1414         fprintf(stderr, " -C\tUse <int> percentage cpu as a custom load (default: no custom load)\n");
1415         fprintf(stderr, " -I\tUse <int> microsecond intervals for custom load (needs -C as well)\n");
1416         fprintf(stderr, " -m\tAdd <comment> to the log file as a separate line\n");
1417         fprintf(stderr, " -w\tAdd <load type> to the list of loads to be tested against\n");
1418         fprintf(stderr, " -x\tExclude <load type> from the list of loads to be tested against\n");
1419         fprintf(stderr, " -W\tAdd <bench> to the list of benchmarks to be tested\n");
1420         fprintf(stderr, " -X\tExclude <bench> from the list of benchmarks to be tested\n");
1421         fprintf(stderr, " -h\tShow this help\n");
1422         fprintf(stderr, "\nIf run without parameters interbench will run a standard benchmark\n\n");
1423 }
1424
1425 #ifdef DEBUG
1426 void deadchild(int crap)
1427 {
1428         pid_t retval;
1429         int status;
1430
1431         crap = 0;
1432
1433         if ((retval = waitpid(-1, &status, WNOHANG)) == -1) {
1434                 if (errno == ECHILD)
1435                         return;
1436                 terminal_error("waitpid");
1437         }
1438         if (WIFEXITED(status) && WEXITSTATUS(status) == 0)
1439                 return;
1440         fprintf(stderr, "\nChild terminated abnormally ");
1441         if (WIFSIGNALED(status))
1442                 fprintf(stderr, "with signal %d", WTERMSIG(status));
1443         fprintf(stderr, "\n");
1444         exit (1);
1445 }
1446 #endif
1447
1448 int load_index(const char* loadname)
1449 {
1450         int i;
1451
1452         for (i = 0 ; i < THREADS ; i++)
1453                 if (strcasecmp(loadname, threadlist[i].label) == 0)
1454                         return i;
1455         return -1;
1456 }
1457
1458 inline int bit_is_on(const unsigned int mask, int index)
1459 {
1460         return (mask & (1 << index)) != 0;
1461 }
1462
1463 inline void set_bit_on(unsigned int *mask, int index)
1464 {
1465         *mask |= (1 << index);
1466 }
1467
1468 int main(int argc, char **argv)
1469 {
1470         unsigned long custom_cpu = 0;
1471         int q, i, j, affinity, benchmark = 0;
1472         unsigned int selected_loads = 0;
1473         unsigned int excluded_loads = 0;
1474         unsigned int selected_benches = 0;
1475         unsigned int excluded_benches = 0;
1476         FILE *fp;
1477         /*
1478          * This file stores the loops_per_ms to be reused in a filename that
1479          * can't be confused
1480          */
1481         char *fname = "interbench.loops_per_ms";
1482         char *comment = NULL;
1483 #ifdef DEBUG
1484         feenableexcept(FE_DIVBYZERO | FE_INVALID | FE_OVERFLOW);
1485         if (signal(SIGCHLD, deadchild) == SIG_ERR)
1486                 terminal_error("signal");
1487 #endif
1488
1489         while ((q = getopt(argc, argv, "hl:L:B:N:ut:bcnrC:I:m:w:x:W:X:")) != -1) {
1490                 switch (q) {
1491                         case 'h':
1492                                 usage();
1493                                 return (0);
1494                         case 'l':
1495                                 ud.loops_per_ms = atoi(optarg);
1496                                 break;
1497                         case 't':
1498                                 ud.duration = atoi(optarg);
1499                                 break;
1500                         case 'L':
1501                                 ud.cpu_load = atoi(optarg);
1502                                 break;
1503                         case 'B':
1504                                 ud.bench_nice = atoi(optarg);
1505                                 break;
1506                         case 'N':
1507                                 ud.load_nice = atoi(optarg);
1508                                 break;
1509                         case 'u':
1510                                 affinity = 1;
1511                                 break;
1512                         case 'b':
1513                                 benchmark = 1;
1514                                 break;
1515                         case 'c':
1516                                 ud.log = 0;
1517                                 break;
1518                         case 'r':
1519                                 ud.do_rt = 1;
1520                                 break;
1521                         case 'C':
1522                                 custom_cpu = (unsigned long)atol(optarg);
1523                                 break;
1524                         case 'I':
1525                                 ud.custom_interval = atol(optarg);
1526                                 break;
1527                         case 'm':
1528                                 comment = optarg;
1529                                 break;
1530                         case 'w':
1531                                 i = load_index(optarg);
1532                                 if (i == -1) {
1533                                         fprintf(stderr, "Unknown load \"%s\"\n", optarg);
1534                                         return (-2);
1535                                 }
1536                                 set_bit_on(&selected_loads, i);
1537                                 break;
1538                         case 'x':
1539                                 i = load_index(optarg);
1540                                 if (i == -1) {
1541                                         fprintf(stderr, "Unknown load \"%s\"\n", optarg);
1542                                         return (-2);
1543                                 }
1544                                 set_bit_on(&excluded_loads, i);
1545                                 break;
1546                         case 'W':
1547                                 i = load_index(optarg);
1548                                 if (i == -1) {
1549                                         fprintf(stderr, "Unknown bench \"%s\"\n", optarg);
1550                                         return (-2);
1551                                 }
1552                                 set_bit_on(&selected_benches, i);
1553                                 break;
1554                         case 'X':
1555                                 i = load_index(optarg);
1556                                 if (i == -1) {
1557                                         fprintf(stderr, "Unknown bench \"%s\"\n", optarg);
1558                                         return (-2);
1559                                 }
1560                                 set_bit_on(&excluded_benches, i);
1561                                 break;
1562                         default:
1563                                 usage();
1564                                 return (1);
1565                 }
1566         }
1567         argc -= optind;
1568         argv += optind;
1569         /* default is all loads */
1570         if (selected_loads == 0)
1571                 selected_loads = (unsigned int)-1;
1572         selected_loads &= ~excluded_loads;
1573         /* default is all benches */
1574         if (selected_benches == 0)
1575                 selected_benches = (unsigned int)-1;
1576         selected_benches &= ~excluded_benches;
1577
1578         if (!test_fifo()) {
1579                 fprintf(stderr, "Unable to get SCHED_FIFO (real time scheduling).\n");
1580                 fprintf(stderr, "You either need to run this as root user or have support for real time RLIMITS.\n");
1581                 if (ud.do_rt) {
1582                         fprintf(stderr, "Real time tests were requested, aborting.\n");
1583                         exit (1);
1584                 }
1585                 fprintf(stderr, "Results will be unreliable.\n");
1586         }
1587         if (!ud.cpu_load) {
1588                 fprintf(stderr, "Invalid cpu load\n");
1589                 exit (1);
1590         }
1591
1592         if ((custom_cpu && !ud.custom_interval) ||
1593                 (ud.custom_interval && !custom_cpu) ||
1594                 custom_cpu > 100) {
1595                         fprintf(stderr, "Invalid custom values, aborting.\n");
1596                         exit (1);
1597         }
1598
1599         if (custom_cpu && ud.custom_interval) {
1600                 ud.custom_run = ud.custom_interval * custom_cpu / 100;
1601                 threadlist[CUSTOM].bench = 1;
1602                 threadlist[CUSTOM].load = 1;
1603                 threadlist[CUSTOM].rtbench = 1;
1604                 threadlist[CUSTOM].rtload = 1;
1605         }
1606
1607         /*FIXME Affinity commented out till working on all architectures */
1608 #if 0
1609         if (affinity) {
1610 #ifdef CPU_SET  /* Current glibc expects cpu_set_t */
1611                 cpu_set_t cpumask;
1612
1613                 CPU_ZERO(&cpumask);
1614                 CPU_SET(0, &cpumask);
1615 #else           /* Old glibc expects unsigned long */
1616                 unsigned long cpumask = 1;
1617 #endif
1618                 if (sched_setaffinity(0, sizeof(cpumask), &cpumask) == -1) {
1619                         if (errno != EPERM)
1620                                 terminal_error("sched_setaffinity");
1621                         fprintf(stderr, "could not set cpu affinity\n");
1622                 }
1623         }
1624 #endif
1625
1626         /* Make benchmark a multiple of 10 seconds for proper range of X loads */
1627         if (ud.duration % 10)
1628                 ud.duration += 10 - ud.duration % 10;
1629
1630         if (benchmark)
1631                 ud.loops_per_ms = 0;
1632         /*
1633          * Try to get loops_per_ms from command line first, file second, and
1634          * benchmark if not available.
1635          */
1636         if (!ud.loops_per_ms) {
1637                 if (benchmark)
1638                         goto bench;
1639                 if ((fp = fopen(fname, "r"))) {
1640                         fscanf(fp, "%lu", &ud.loops_per_ms);
1641                         if (fclose(fp) == -1)
1642                                 terminal_error("fclose");
1643                         if (ud.loops_per_ms) {
1644                                 fprintf(stderr,
1645                                         "%lu loops_per_ms read from file interbench.loops_per_ms\n",
1646                                         ud.loops_per_ms);
1647                                 goto loops_known;
1648                         }
1649                 } else
1650                         if (errno != ENOENT)
1651                                 terminal_error("fopen");
1652 bench:
1653                 fprintf(stderr, "loops_per_ms unknown; benchmarking...\n");
1654
1655                 /*
1656                  * To get as accurate a loop as possible we time it running
1657                  * SCHED_FIFO if we can
1658                  */
1659                 set_fifo(99);
1660                 calibrate_loop();
1661                 set_normal();
1662         } else
1663                 fprintf(stderr, "loops_per_ms specified from command line\n");
1664
1665         if (!(fp = fopen(fname, "w"))) {
1666                 if (errno != EACCES)    /* No write access is not terminal */
1667                         terminal_error("fopen");
1668                 fprintf(stderr, "Unable to write to file interbench.loops_per_ms\n");
1669                 goto loops_known;
1670         }
1671         fprintf(fp, "%lu", ud.loops_per_ms);
1672         fprintf(stderr, "%lu loops_per_ms saved to file interbench.loops_per_ms\n",
1673                 ud.loops_per_ms);
1674         if (fclose(fp) == -1)
1675                 terminal_error("fclose");
1676
1677 loops_known:
1678         get_ram();
1679         get_logfilename();
1680         create_read_file();
1681         init_pipes();
1682
1683         if (ud.log && !(ud.logfile = fopen(ud.logfilename, "a"))) {
1684                 if (errno != EACCES)
1685                         terminal_error("fopen");
1686                 fprintf(stderr, "Unable to write to logfile\n");
1687                 ud.log = 0;
1688         }
1689         log_output("\n");
1690         log_output("Using %lu loops per ms, running every load for %d seconds\n",
1691                 ud.loops_per_ms, ud.duration);
1692         log_output("Benchmarking kernel %s at datestamp %s\n",
1693                 ud.unamer, ud.datestamp);
1694         if (comment)
1695                 log_output("Comment: %s\n", comment);
1696         log_output("\n");
1697
1698         for (i = 0 ; i < THREADS ; i++)
1699                 threadlist[i].threadno = i;
1700
1701         for (i = 0 ; i < THREADS ; i++) {
1702                 struct thread *thi = &threadlist[i];
1703                 int *benchme;
1704
1705                 if (ud.do_rt)
1706                         benchme = &threadlist[i].rtbench;
1707                 else
1708                         benchme = &threadlist[i].bench;
1709
1710                 if (!*benchme || !bit_is_on(selected_benches, i))
1711                         continue;
1712
1713                 log_output("--- Benchmarking simulated cpu of %s ", threadlist[i].label);
1714                 if (ud.do_rt)
1715                         log_output("real time ");
1716                 else if (ud.bench_nice)
1717                         log_output("nice %d ", ud.bench_nice);
1718                 log_output("in the presence of simulated ");
1719                 if (ud.load_nice)
1720                         log_output("nice %d ", ud.load_nice);
1721                 log_output("---\n");
1722
1723                 log_output("Load");
1724                 if (ud.do_rt)
1725                         log_output("\tLatency +/- SD (us)");
1726                 else
1727                         log_output("\tLatency +/- SD (ms)");
1728                 log_output("  Max Latency ");
1729                 log_output("  %% Desired CPU");
1730                 if (!thi->nodeadlines)
1731                         log_output("  %% Deadlines Met");
1732                 log_output("\n");
1733
1734                 for (j = 0 ; j < THREADS ; j++) {
1735                         struct thread *thj = &threadlist[j];
1736
1737                         if (j == i || !bit_is_on(selected_loads, j) ||
1738                                 (!threadlist[j].load && !ud.do_rt) ||
1739                                 (!threadlist[j].rtload && ud.do_rt))
1740                                         continue;
1741                         log_output("%s\t", thj->label);
1742                         sync_flush();
1743                         bench(i, j);
1744                 }
1745                 log_output("\n");
1746         }
1747         log_output("\n");
1748         if (ud.log)
1749                 fclose(ud.logfile);
1750
1751         return 0;
1752 }