1 /*******************************************
3 * Interbench - Interactivity benchmark
5 * Author: Con Kolivas <kernel@kolivas.org>
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
21 *******************************************/
24 #define _FILE_OFFSET_BITS 64 /* Large file support */
25 #define INTERBENCH_VERSION "0.30"
37 #include <semaphore.h>
42 #include <sys/utsname.h>
44 #include <sys/resource.h>
45 #include <sys/types.h>
48 #include "interbench.h"
50 #define MAX_UNAME_LENGTH 100
51 #define MAX_LOG_LENGTH ((MAX_UNAME_LENGTH) + 4)
52 #define MIN_BLK_SIZE 1024
53 #define DEFAULT_RESERVE 64
54 #define MB (1024 * 1024) /* 2^20 bytes */
56 #define MAX_MEM_IN_MB (1024 * 64) /* 64 GB */
59 unsigned long loops_per_ms;
60 unsigned long ram, swap;
65 unsigned long custom_run;
66 unsigned long custom_interval;
67 unsigned long cpu_load;
68 char logfilename[MAX_LOG_LENGTH];
70 char unamer[MAX_UNAME_LENGTH];
79 /* Pipes main to/from load and bench processes */
80 static int m2l[2], l2m[2], m2b[2], b2m[2];
82 /* Which member of becnhmarks is used when not benchmarking */
83 #define NOT_BENCHING (THREADS)
84 #define CUSTOM (THREADS - 1)
87 * To add another load or a benchmark you need to increment the value of
88 * THREADS, add a function prototype for your function and add an entry to
89 * the threadlist. To specify whether the function is a benchmark or a load
90 * set the benchmark and/or load flag as appropriate. The basic requirements
91 * of a new load can be seen by using emulate_none as a template.
94 void emulate_none(struct thread *th);
95 void emulate_audio(struct thread *th);
96 void emulate_video(struct thread *th);
97 void emulate_x(struct thread *th);
98 void emulate_game(struct thread *th);
99 void emulate_burn(struct thread *th);
100 void emulate_write(struct thread *th);
101 void emulate_read(struct thread *th);
102 void emulate_ring(struct thread *th);
103 void emulate_compile(struct thread *th);
104 void emulate_memload(struct thread *th);
105 void emulate_hackbench(struct thread *th);
106 void emulate_custom(struct thread *th);
108 struct thread threadlist[THREADS] = {
109 {.label = "None", .name = emulate_none, .load = 1, .rtload = 1},
110 {.label = "Audio", .name = emulate_audio, .bench = 1, .rtbench = 1},
111 {.label = "Video", .name = emulate_video, .bench = 1, .rtbench = 1, .load = 1, .rtload = 1},
112 {.label = "X", .name = emulate_x, .bench = 1, .load = 1, .rtload = 1},
113 {.label = "Gaming", .name = emulate_game, .nodeadlines = 1, .bench = 1},
114 {.label = "Burn", .name = emulate_burn, .load = 1, .rtload = 1},
115 {.label = "Write", .name = emulate_write, .load = 1, .rtload = 1},
116 {.label = "Read", .name = emulate_read, .load = 1, .rtload = 1},
117 {.label = "Ring", .name = emulate_ring, .load = 0, .rtload = 0}, /* No useful data from this */
118 {.label = "Compile", .name = emulate_compile, .load = 1, .rtload = 1},
119 {.label = "Memload", .name = emulate_memload, .load = 1, .rtload = 1},
120 {.label = "Hack", .name = emulate_hackbench, .load = 0, .rtload = 0}, /* This is causing signal headaches */
121 {.label = "Custom", .name = emulate_custom}, /* Leave custom as last entry */
124 void init_sem(sem_t *sem);
125 void init_all_sems(struct sems *s);
126 void initialise_thread(int i);
127 void start_thread(struct thread *th);
128 void stop_thread(struct thread *th);
130 void terminal_error(const char *name)
132 fprintf(stderr, "\n");
137 void terminal_fileopen_error(FILE *fp, char *name)
139 if (fclose(fp) == -1)
140 terminal_error("fclose");
141 terminal_error(name);
144 unsigned long long get_nsecs(struct timespec *myts)
146 if (clock_gettime(CLOCK_REALTIME, myts))
147 terminal_error("clock_gettime");
148 return (myts->tv_sec * 1000000000 + myts->tv_nsec );
151 unsigned long get_usecs(struct timespec *myts)
153 if (clock_gettime(CLOCK_REALTIME, myts))
154 terminal_error("clock_gettime");
155 return (myts->tv_sec * 1000000 + myts->tv_nsec / 1000 );
158 void set_fifo(int prio)
160 struct sched_param sp;
162 memset(&sp, 0, sizeof(sp));
163 sp.sched_priority = prio;
164 if (sched_setscheduler(0, SCHED_FIFO, &sp) == -1) {
166 terminal_error("sched_setscheduler");
174 mlockflags = MCL_CURRENT | MCL_FUTURE;
175 mlockall(mlockflags); /* Is not critical if this fails */
178 void set_munlock(void)
180 if (munlockall() == -1)
181 terminal_error("munlockall");
184 void set_thread_fifo(pthread_t pthread, int prio)
186 struct sched_param sp;
187 memset(&sp, 0, sizeof(sp));
188 sp.sched_priority = prio;
189 if (pthread_setschedparam(pthread, SCHED_FIFO, &sp) == -1)
190 terminal_error("pthread_setschedparam");
193 void set_normal(void)
195 struct sched_param sp;
196 memset(&sp, 0, sizeof(sp));
197 sp.sched_priority = 0;
198 if (sched_setscheduler(0, SCHED_OTHER, &sp) == -1) {
199 fprintf(stderr, "Weird, could not unset RT scheduling!\n");
203 void set_nice(int prio)
205 if (setpriority(PRIO_PROCESS, 0, prio) == -1)
206 terminal_error("setpriority");
211 struct sched_param sp;
212 memset(&sp, 0, sizeof(sp));
213 sp.sched_priority = 99;
214 if (sched_setscheduler(0, SCHED_FIFO, &sp) == -1) {
216 terminal_error("sched_setscheduler");
219 if (sched_getscheduler(0) != SCHED_FIFO)
228 void set_thread_normal(pthread_t pthread)
230 struct sched_param sp;
231 memset(&sp, 0, sizeof(sp));
232 sp.sched_priority = 0;
233 if (pthread_setschedparam(pthread, SCHED_OTHER, &sp) == -1)
234 terminal_error("pthread_setschedparam");
237 void sync_flush(void)
239 if ((fflush(NULL)) == EOF)
240 terminal_error("fflush");
246 unsigned long compute_allocable_mem(void)
248 unsigned long total = ud.ram + ud.swap;
249 unsigned long usage = ud.ram * 110 / 100 ;
251 /* Leave at least DEFAULT_RESERVE free space and check for maths overflow. */
252 if (total - DEFAULT_RESERVE < usage)
253 usage = total - DEFAULT_RESERVE;
254 usage /= 1024; /* to megabytes */
260 void burn_loops(unsigned long loops)
265 * We need some magic here to prevent the compiler from optimising
266 * this loop away. Otherwise trying to emulate a fixed cpu load
267 * with this loop will not work.
269 for (i = 0 ; i < loops ; i++)
270 asm volatile("" : : : "memory");
273 /* Use this many usecs of cpu time */
274 void burn_usecs(unsigned long usecs)
276 unsigned long ms_loops;
278 ms_loops = ud.loops_per_ms / 1000 * usecs;
279 burn_loops(ms_loops);
282 void microsleep(unsigned long long usecs)
284 struct timespec req, rem;
286 rem.tv_sec = rem.tv_nsec = 0;
288 req.tv_sec = usecs / 1000000;
289 req.tv_nsec = (usecs - (req.tv_sec * 1000000)) * 1000;
291 if ((nanosleep(&req, &rem)) == -1) {
292 if (errno == EINTR) {
293 if (rem.tv_sec || rem.tv_nsec) {
294 req.tv_sec = rem.tv_sec;
295 req.tv_nsec = rem.tv_nsec;
300 terminal_error("nanosleep");
307 * Yes, sem_post and sem_wait shouldn't return -1 but they do so we must
310 inline void post_sem(sem_t *s)
313 if ((sem_post(s)) == -1) {
316 terminal_error("sem_post");
320 inline void wait_sem(sem_t *s)
323 if ((sem_wait(s)) == -1) {
326 terminal_error("sem_wait");
330 inline int trywait_sem(sem_t *s)
335 if ((ret = sem_trywait(s)) == -1) {
339 terminal_error("sem_trywait");
344 inline ssize_t Read(int fd, void *buf, size_t count)
349 retval = read(fd, buf, count);
353 terminal_error("read");
358 inline ssize_t Write(int fd, const void *buf, size_t count)
363 retval = write(fd, &buf, count);
367 terminal_error("write");
372 unsigned long periodic_schedule(struct thread *th, unsigned long run_usecs,
373 unsigned long interval_usecs, unsigned long long deadline)
375 unsigned long long latency, missed_latency;
376 unsigned long long current_time;
377 struct tk_thread *tk;
378 struct data_table *tb;
379 struct timespec myts;
385 current_time = get_usecs(&myts);
386 if (current_time > deadline + tk->slept_interval)
387 latency = current_time - deadline- tk->slept_interval;
389 /* calculate the latency for missed frames */
392 current_time = get_usecs(&myts);
393 if (interval_usecs && current_time > deadline + interval_usecs) {
394 /* We missed the deadline even before we consumed cpu */
395 unsigned long intervals;
397 deadline += interval_usecs;
398 intervals = (current_time - deadline) /
401 tb->missed_deadlines += intervals;
402 missed_latency = intervals * interval_usecs;
403 deadline += intervals * interval_usecs;
404 tb->missed_burns += intervals;
408 burn_usecs(run_usecs);
409 current_time = get_usecs(&myts);
410 tb->achieved_burns++;
413 * If we meet the deadline we move the deadline forward, otherwise
414 * we consider it a missed deadline and dropped frame etc.
416 deadline += interval_usecs;
417 if (deadline >= current_time) {
420 if (interval_usecs) {
421 unsigned long intervals = (current_time - deadline) /
424 tb->missed_deadlines += intervals;
425 missed_latency = intervals * interval_usecs;
426 deadline += intervals * interval_usecs;
428 tb->missed_burns += intervals;
430 deadline = current_time;
435 tk->sleep_interval = deadline - current_time;
437 post_sem(&tk->sem.start);
438 wait_sem(&tk->sem.complete);
441 * Must add missed_latency to total here as this function may not be
442 * called again and the missed latency can be lost
444 latency += missed_latency;
445 if (latency > tb->max_latency)
446 tb->max_latency = latency;
447 tb->total_latency += latency;
448 tb->sum_latency_squared += latency * latency;
454 void initialise_thread_data(struct data_table *tb)
458 tb->sum_latency_squared =
460 tb->missed_deadlines =
465 void create_pthread(pthread_t * thread, pthread_attr_t * attr,
466 void * (*start_routine)(void *), void *arg)
468 if (pthread_create(thread, attr, start_routine, arg))
469 terminal_error("pthread_create");
472 void join_pthread(pthread_t th, void **thread_return)
474 if (pthread_join(th, thread_return))
475 terminal_error("pthread_join");
478 void emulate_none(struct thread *th)
480 sem_t *s = &th->sem.stop;
484 #define AUDIO_INTERVAL (50000)
485 #define AUDIO_RUN (AUDIO_INTERVAL / 20)
486 /* We emulate audio by using 5% cpu and waking every 50ms */
487 void emulate_audio(struct thread *th)
489 unsigned long long deadline;
490 sem_t *s = &th->sem.stop;
491 struct timespec myts;
493 th->decasecond_deadlines = 1000000 / AUDIO_INTERVAL * 10;
494 deadline = get_usecs(&myts);
497 deadline = periodic_schedule(th, AUDIO_RUN, AUDIO_INTERVAL,
504 /* We emulate video by using 40% cpu and waking for 60fps */
505 #define VIDEO_INTERVAL (1000000 / 60)
506 #define VIDEO_RUN (VIDEO_INTERVAL * 40 / 100)
507 void emulate_video(struct thread *th)
509 unsigned long long deadline;
510 sem_t *s = &th->sem.stop;
511 struct timespec myts;
513 th->decasecond_deadlines = 1000000 / VIDEO_INTERVAL * 10;
514 deadline = get_usecs(&myts);
517 deadline = periodic_schedule(th, VIDEO_RUN, VIDEO_INTERVAL,
525 * We emulate X by running for a variable percentage of cpu from 0-100%
528 void emulate_x(struct thread *th)
530 unsigned long long deadline;
531 sem_t *s = &th->sem.stop;
532 struct timespec myts;
534 th->decasecond_deadlines = 100;
535 deadline = get_usecs(&myts);
539 for (i = 0 ; i <= 100 ; i++) {
541 deadline = periodic_schedule(th, i * 1000, j * 1000,
543 deadline += i * 1000;
551 * We emulate gaming by using 100% cpu and seeing how many frames (jobs
552 * completed) we can do in that time. Deadlines are meaningless with
553 * unlocked frame rates. We do not use periodic schedule because for
554 * this load because this never wants to sleep.
556 #define GAME_INTERVAL (100000)
557 #define GAME_RUN (GAME_INTERVAL)
558 void emulate_game(struct thread *th)
560 unsigned long long deadline, current_time, latency;
561 sem_t *s = &th->sem.stop;
562 struct timespec myts;
563 struct data_table *tb;
566 th->decasecond_deadlines = 1000000 / GAME_INTERVAL * 10;
569 deadline = get_usecs(&myts) + GAME_INTERVAL;
570 burn_usecs(GAME_RUN);
571 current_time = get_usecs(&myts);
572 /* use usecs instead of simple count for game burn statistics */
573 tb->achieved_burns += GAME_RUN;
574 if (current_time > deadline) {
575 latency = current_time - deadline;
576 tb->missed_burns += latency;
579 if (latency > tb->max_latency)
580 tb->max_latency = latency;
581 tb->total_latency += latency;
582 tb->sum_latency_squared += latency * latency;
589 void *burn_thread(void *t)
596 s = &th->sem.stopchild;
599 burn_loops(ud.loops_per_ms);
600 if (!trywait_sem(s)) {
608 /* Have ud.cpu_load threads burn cpu continuously */
609 void emulate_burn(struct thread *th)
611 sem_t *s = &th->sem.stop;
614 pthread_t burnthreads[ud.cpu_load];
617 for (i = 0 ; i < ud.cpu_load ; i++)
618 create_pthread(&burnthreads[i], NULL, burn_thread,
621 post_sem(&th->sem.stopchild);
622 for (i = 0 ; i < ud.cpu_load ; i++)
623 join_pthread(burnthreads[i], NULL);
626 /* Write a file the size of ram continuously */
627 void emulate_write(struct thread *th)
629 sem_t *s = &th->sem.stop;
631 char *name = "interbench.write";
636 if (!(fp = fopen(name, "w")))
637 terminal_error("fopen");
638 if (stat(name, &statbuf) == -1)
639 terminal_fileopen_error(fp, "stat");
640 if (statbuf.st_blksize < MIN_BLK_SIZE)
641 statbuf.st_blksize = MIN_BLK_SIZE;
642 mem = ud.ram / (statbuf.st_blksize / 1024); /* kilobytes to blocks */
643 if (!(buf = calloc(1, statbuf.st_blksize)))
644 terminal_fileopen_error(fp, "calloc");
645 if (fclose(fp) == -1)
646 terminal_error("fclose");
651 if (!(fp = fopen(name, "w")))
652 terminal_error("fopen");
653 if (stat(name, &statbuf) == -1)
654 terminal_fileopen_error(fp, "stat");
655 for (i = 0 ; i < mem; i++) {
656 if (fwrite(buf, statbuf.st_blksize, 1, fp) != 1)
657 terminal_fileopen_error(fp, "fwrite");
661 if (fclose(fp) == -1)
662 terminal_error("fclose");
666 if (fclose(fp) == -1)
667 terminal_error("fclose");
668 if (remove(name) == -1)
669 terminal_error("remove");
673 /* Read a file the size of ram continuously */
674 void emulate_read(struct thread *th)
676 sem_t *s = &th->sem.stop;
677 char *name = "interbench.read";
683 if ((tmp = open(name, O_RDONLY)) == -1)
684 terminal_error("open");
685 if (stat(name, &statbuf) == -1)
686 terminal_error("stat");
687 bsize = statbuf.st_blksize;
688 if (!(buf = malloc(bsize)))
689 terminal_error("malloc");
695 * We have to read the whole file before quitting the load
696 * to prevent the data being cached for the next read. This
697 * is also the reason the file is the size of physical ram.
699 while ((rd = Read(tmp , buf, bsize)) > 0);
702 if (lseek(tmp, (off_t)0, SEEK_SET) == -1)
703 terminal_error("lseek");
707 #define RINGTHREADS 4
709 struct thread ringthreads[RINGTHREADS];
711 void *ring_thread(void *t)
718 th = &ringthreads[i];
721 if (post_to == RINGTHREADS)
728 post_sem(&ringthreads[post_to].sem.start);
729 if (!trywait_sem(&s->stop))
733 post_sem(&ringthreads[post_to].sem.start);
734 post_sem(&s->complete);
738 /* Create a ring of 4 processes that wake each other up in a circle */
739 void emulate_ring(struct thread *th)
741 sem_t *s = &th->sem.stop;
744 for (i = 0 ; i < RINGTHREADS ; i++) {
745 init_all_sems(&ringthreads[i].sem);
746 create_pthread(&ringthreads[i].pthread, NULL,
747 ring_thread, (void*)(long) i);
750 wait_sem(&ringthreads[0].sem.ready);
751 post_sem(&ringthreads[0].sem.start);
753 for (i = 0 ; i < RINGTHREADS ; i++)
754 post_sem(&ringthreads[i].sem.stop);
755 for (i = 0 ; i < RINGTHREADS ; i++) {
756 wait_sem(&ringthreads[i].sem.complete);
757 join_pthread(ringthreads[i].pthread, NULL);
761 /* We emulate a compile by running burn, write and read threads simultaneously */
762 void emulate_compile(struct thread *th)
764 sem_t *s = &th->sem.stop;
765 unsigned long i, threads[3];
767 for (i = 0 ; i < THREADS ; i++) {
768 if (threadlist[i].label == "Burn")
770 if (threadlist[i].label == "Write")
772 if (threadlist[i].label == "Read")
775 for (i = 0 ; i < 3 ; i++) {
777 fprintf(stderr, "Can't find all threads for compile load\n");
781 for (i = 0 ; i < 3 ; i++) {
782 initialise_thread(threads[i]);
783 start_thread(&threadlist[threads[i]]);
786 for (i = 0 ; i < 3 ; i++)
787 stop_thread(&threadlist[threads[i]]);
790 int *grab_and_touch (char *block[], int i)
792 block[i] = (char *) malloc(MB);
795 return (memset(block[i], 1, MB));
798 /* We emulate a memory load by allocating and torturing 110% of available ram */
799 void emulate_memload(struct thread *th)
801 sem_t *s = &th->sem.stop;
802 unsigned long touchable_mem, i;
803 char *mem_block[MAX_MEM_IN_MB];
806 touchable_mem = compute_allocable_mem();
807 /* loop until we're killed, frobbing memory in various perverted ways */
809 for (i = 0; i < touchable_mem; i++) {
810 success = grab_and_touch(mem_block, i);
818 for (i = 0; i < touchable_mem; i++) {
819 memcpy(mem_block[i], mem_block[(i + touchable_mem / 2) %
824 for (i = 0; i < touchable_mem; i++) {
831 for (i = 0; i < touchable_mem; i++)
837 struct thread hackthread;
839 void emulate_hackbench(struct thread *th)
841 sem_t *s = &th->sem.stop;
843 init_all_sems(&hackthread.sem);
844 create_pthread(&hackthread.pthread, NULL, hackbench_thread, (void *) 0);
848 post_sem(&hackthread.sem.stop);
849 wait_sem(&hackthread.sem.complete);
851 join_pthread(hackthread.pthread, NULL);
854 #define CUSTOM_INTERVAL (ud.custom_interval)
855 #define CUSTOM_RUN (ud.custom_run)
856 void emulate_custom(struct thread *th)
858 unsigned long long deadline;
859 sem_t *s = &th->sem.stop;
860 struct timespec myts;
862 th->decasecond_deadlines = 1000000 / CUSTOM_INTERVAL * 10;
863 deadline = get_usecs(&myts);
866 deadline = periodic_schedule(th, CUSTOM_RUN, CUSTOM_INTERVAL,
873 void *timekeeping_thread(void *t)
876 struct tk_thread *tk;
878 struct timespec myts;
883 s = &th->tkthread.sem;
885 * If this timekeeping thread is that of a benchmarked thread we run
886 * even higher priority than the benched thread is if running real
887 * time. Otherwise, the load timekeeping thread, which does not need
888 * accurate accounting remains SCHED_NORMAL;
890 if (th->dt != &th->benchmarks[NOT_BENCHING])
892 /* These values must be changed at the appropriate places or race */
893 tk->sleep_interval = tk->slept_interval = 0;
897 unsigned long start_time, now;
899 if (!trywait_sem(&s->stop))
902 tk->slept_interval = 0;
903 start_time = get_usecs(&myts);
904 if (!trywait_sem(&s->stop))
906 if (tk->sleep_interval) {
907 unsigned long diff = 0;
908 microsleep(tk->sleep_interval);
909 now = get_usecs(&myts);
910 /* now should always be > start_time but... */
911 if (now > start_time) {
912 diff = now - start_time;
913 if (diff > tk->sleep_interval)
914 tk->slept_interval = diff -
918 tk->sleep_interval = 0;
919 post_sem(&s->complete);
926 * All the sleep functions such as nanosleep can only guarantee that they
927 * sleep for _at least_ the time requested. We work around this by having
928 * a high priority real time thread that accounts for the extra time slept
929 * in nanosleep. This allows wakeup latency of the tested thread to be
930 * accurate and reflect true scheduling delays.
932 void *emulation_thread(void *t)
935 struct tk_thread *tk;
936 struct sems *s, *tks;
945 /* Start the timekeeping thread */
946 create_pthread(&th->tk_pthread, NULL, timekeeping_thread,
948 /* Wait for timekeeping thread to be ready */
949 wait_sem(&tks->ready);
951 /* Tell main we're ready to start*/
954 /* Wait for signal from main to start thread */
957 /* Start the actual function being benched/or running as load */
960 /* Stop the timekeeping thread */
961 post_sem(&tks->stop);
962 post_sem(&tks->start);
963 join_pthread(th->tk_pthread, NULL);
965 /* Tell main we've finished */
966 post_sem(&s->complete);
971 * In an unoptimised loop we try to benchmark how many meaningless loops
972 * per second we can perform on this hardware to fairly accurately
973 * reproduce certain percentage cpu usage
975 void calibrate_loop(void)
977 unsigned long long start_time, loops_per_msec, run_time = 0;
979 struct timespec myts;
981 loops_per_msec = 100000;
983 /* Calibrate to within 1% accuracy */
984 while (run_time > 1010000 || run_time < 990000) {
985 loops = loops_per_msec;
986 start_time = get_nsecs(&myts);
988 run_time = get_nsecs(&myts) - start_time;
989 loops_per_msec = (1000000 * loops_per_msec / run_time ? :
993 /* Rechecking after a pause increases reproducibility */
995 loops = loops_per_msec;
996 start_time = get_nsecs(&myts);
998 run_time = get_nsecs(&myts) - start_time;
1000 /* Tolerate 5% difference on checking */
1001 if (run_time > 1050000 || run_time < 950000)
1004 ud.loops_per_ms = loops_per_msec;
1007 void log_output(const char *format, ...) __attribute__ ((format(printf, 1, 2)));
1009 /* Output to console +/- logfile */
1010 void log_output(const char *format, ...)
1014 va_start(ap, format);
1015 if (vprintf(format, ap) == -1)
1016 terminal_error("vprintf");
1019 va_start(ap, format);
1020 if (vfprintf(ud.logfile, format, ap) == -1)
1021 terminal_error("vpfrintf");
1027 /* Calculate statistics and output them */
1028 void show_latencies(struct thread *th)
1030 struct data_table *tbj;
1031 struct tk_thread *tk;
1032 double average_latency, deadlines_met, samples_met, sd, max_latency;
1033 long double variance = 0;
1038 if (tbj->nr_samples > 1) {
1039 average_latency = tbj->total_latency / tbj->nr_samples;
1040 variance = (tbj->sum_latency_squared - (average_latency *
1041 average_latency) / tbj->nr_samples) / (tbj->nr_samples - 1);
1042 sd = sqrtl(variance);
1044 average_latency = tbj->total_latency;
1049 * Landing on the boundary of a deadline can make loaded runs appear
1050 * to do more work than unloaded due to tiny duration differences.
1052 if (tbj->achieved_burns > 0)
1053 samples_met = (double)tbj->achieved_burns /
1054 (double)(tbj->achieved_burns + tbj->missed_burns) * 100;
1057 max_latency = tbj->max_latency;
1058 /* When benchmarking rt we represent the data in us */
1060 average_latency /= 1000;
1062 max_latency /= 1000;
1064 if (tbj->deadlines_met == 0)
1067 deadlines_met = (double)tbj->deadlines_met /
1068 (double)(tbj->missed_deadlines + tbj->deadlines_met) * 100;
1070 /* Messy nonsense to format the output nicely */
1071 if (average_latency >= 100)
1072 log_output("%7.0f +/- ", average_latency);
1074 log_output("%7.3g +/- ", average_latency);
1076 log_output("%-9.0f", sd);
1078 log_output("%-9.3g", sd);
1079 if (max_latency >= 100)
1080 log_output("%7.0f\t", max_latency);
1082 log_output("%7.3g\t", max_latency);
1083 log_output("\t%4.3g", samples_met);
1084 if (!th->nodeadlines)
1085 log_output("\t%11.3g", deadlines_met);
1090 void create_read_file(void)
1094 char *name = "interbench.read";
1096 struct stat statbuf;
1097 unsigned long mem, bsize;
1100 if ((tmp = open(name, O_RDONLY)) == -1) {
1101 if (errno != ENOENT)
1102 terminal_error("open");
1105 if (stat(name, &statbuf) == -1)
1106 terminal_error("stat");
1107 if (statbuf.st_blksize < MIN_BLK_SIZE)
1108 statbuf.st_blksize = MIN_BLK_SIZE;
1109 bsize = statbuf.st_blksize;
1110 if (statbuf.st_size / 1024 / bsize == ud.ram / bsize)
1112 if (remove(name) == -1)
1113 terminal_error("remove");
1115 fprintf(stderr,"Creating file for read load...\n");
1116 if (!(fp = fopen(name, "w")))
1117 terminal_error("fopen");
1118 if (stat(name, &statbuf) == -1)
1119 terminal_fileopen_error(fp, "stat");
1120 if (statbuf.st_blksize < MIN_BLK_SIZE)
1121 statbuf.st_blksize = MIN_BLK_SIZE;
1122 bsize = statbuf.st_blksize;
1123 if (!(buf = calloc(1, bsize)))
1124 terminal_fileopen_error(fp, "calloc");
1125 mem = ud.ram / (bsize / 1024); /* kilobytes to blocks */
1127 for (i = 0 ; i < mem; i++) {
1128 if (fwrite(buf, bsize, 1, fp) != 1)
1129 terminal_fileopen_error(fp, "fwrite");
1131 if (fclose(fp) == -1)
1132 terminal_error("fclose");
1141 if(!(meminfo = fopen("/proc/meminfo", "r")))
1142 terminal_error("fopen");
1144 ud.ram = ud.swap = 0;
1145 while( !feof(meminfo) && !fscanf(meminfo, "MemTotal: %lu kB", &ud.ram) )
1146 fgets(aux,sizeof(aux),meminfo);
1147 while( !feof(meminfo) && !fscanf(meminfo, "SwapTotal: %lu kB", &ud.swap) )
1148 fgets(aux,sizeof(aux),meminfo);
1149 if (fclose(meminfo) == -1)
1150 terminal_error("fclose");
1152 if( !ud.ram || !ud.swap ) {
1154 fprintf(stderr, "\nCould not get memory or swap size. ");
1155 fprintf(stderr, "Will not perform mem_load\n");
1156 for (i = 0 ; i < THREADS ; i++) {
1157 if (threadlist[i].label == "Memload") {
1158 threadlist[i].load = 0;
1159 threadlist[i].rtload = 0;
1165 void get_logfilename(void)
1170 int year, month, day, hours, minutes;
1173 if (uname(&buf) == -1)
1174 terminal_error("uname");
1175 if (!(mytm = localtime(&t)))
1176 terminal_error("localtime");
1177 year = mytm->tm_year + 1900;
1178 month = mytm->tm_mon + 1;
1179 day = mytm->tm_mday;
1180 hours = mytm->tm_hour;
1181 minutes = mytm->tm_min;
1182 strncpy(ud.unamer, buf.release, MAX_UNAME_LENGTH);
1184 sprintf(ud.datestamp, "%2d%02d%02d%02d%02d",
1185 year, month, day, hours, minutes);
1186 snprintf(ud.logfilename, MAX_LOG_LENGTH, "%s.log", ud.unamer);
1189 void start_thread(struct thread *th)
1191 post_sem(&th->sem.start);
1194 void stop_thread(struct thread *th)
1196 post_sem(&th->sem.stop);
1197 wait_sem(&th->sem.complete);
1199 /* Kill the thread */
1200 join_pthread(th->pthread, NULL);
1203 void init_sem(sem_t *sem)
1205 if (sem_init(sem, 0, 0))
1206 terminal_error("sem_init");
1209 void init_all_sems(struct sems *s)
1211 /* Initialise the semaphores */
1212 init_sem(&s->ready);
1213 init_sem(&s->start);
1215 init_sem(&s->complete);
1216 init_sem(&s->stopchild);
1219 void initialise_thread(int i)
1221 struct thread *th = &threadlist[i];
1223 init_all_sems(&th->sem);
1224 /* Create the threads. Yes, the (long) cast is fugly but it's safe*/
1225 create_pthread(&th->pthread, NULL, emulation_thread, (void*)(long)i);
1227 wait_sem(&th->sem.ready);
1229 * We set this pointer generically to NOT_BENCHING and set it to the
1230 * benchmarked array entry only on benched threads.
1232 th->dt = &th->benchmarks[NOT_BENCHING];
1233 initialise_thread_data(th->dt);
1237 /* A pseudo-semaphore for processes using a pipe */
1238 void wait_on(int pype)
1240 int retval, buf = 0;
1242 retval = Read(pype, &buf, sizeof(buf));
1244 fprintf(stderr, "\nread returned 0\n");
1249 void wakeup_with(int pype)
1251 int retval, buf = 1;
1253 retval = Write(pype, &buf, sizeof(buf));
1255 fprintf(stderr, "\nwrite returned 0\n");
1260 void run_loadchild(int j)
1263 thj = &threadlist[j];
1265 set_nice(ud.load_nice);
1266 initialise_thread(j);
1268 /* Tell main we're ready */
1269 wakeup_with(l2m[1]);
1271 /* Main tells us we're ready */
1275 /* Tell main we received the start and are running */
1276 wakeup_with(l2m[1]);
1278 /* Main tells us to stop */
1282 /* Tell main we've finished */
1283 wakeup_with(l2m[1]);
1287 void run_benchchild(int i, int j)
1291 thi = &threadlist[i];
1293 set_nice(ud.bench_nice);
1296 initialise_thread(i);
1297 /* Point the data table to the appropriate load being tested */
1298 thi->dt = &thi->benchmarks[j];
1299 initialise_thread_data(thi->dt);
1301 set_thread_fifo(thi->pthread, 95);
1303 /* Tell main we're ready */
1304 wakeup_with(b2m[1]);
1306 /* Main tells us we're ready */
1310 /* Tell main we have started */
1311 wakeup_with(b2m[1]);
1313 /* Main tells us to stop */
1318 set_thread_normal(thi->pthread);
1321 show_latencies(thi);
1323 /* Tell main we've finished */
1324 wakeup_with(b2m[1]);
1328 void bench(int i, int j)
1330 pid_t bench_pid, load_pid;
1332 if ((load_pid = fork()) == -1)
1333 terminal_error("fork");
1337 /* Wait for load process to be ready */
1340 if ((bench_pid = fork()) == -1)
1341 terminal_error("fork");
1343 run_benchchild(i, j);
1345 /* Wait for bench process to be ready */
1349 * We want to be higher priority than everything to signal them to
1350 * stop and we lock our memory if we can as well
1355 /* Wakeup the load process */
1356 wakeup_with(m2l[1]);
1357 /* Load tells it has received the first message and is running */
1360 /* After a small delay, wake up the benched process */
1362 wakeup_with(m2b[1]);
1364 /* Bench tells it has received the first message and is running */
1366 microsleep(ud.duration * 1000000);
1368 /* Tell the benched process to stop its threads and output results */
1369 wakeup_with(m2b[1]);
1371 /* Tell the load process to stop its threads */
1372 wakeup_with(m2l[1]);
1374 /* Return to SCHED_NORMAL */
1378 /* Wait for load and bench processes to terminate */
1383 void init_pipe(int *pype)
1385 if (pipe(pype) == -1)
1386 terminal_error("pipe");
1389 void init_pipes(void)
1399 /* Affinity commented out till working on all architectures */
1400 fprintf(stderr, "interbench v " INTERBENCH_VERSION " by Con Kolivas\n");
1401 fprintf(stderr, "interbench [-l <int>] [-L <int>] [-t <int] [-B <int>] [-N <int>]\n");
1402 fprintf(stderr, "\t[-b] [-c] [-r] [-C <int> -I <int>] [-m <comment>]\n");
1403 fprintf(stderr, "\t[-w <load type>] [-x <load type>] [-W <bench>] [-X <bench>]\n");
1404 fprintf(stderr, "\t[-h\]\n\n");
1405 fprintf(stderr, " -l\tUse <int> loops per sec (default: use saved benchmark)\n");
1406 fprintf(stderr, " -L\tUse cpu load of <int> with burn load (default: 4)\n");
1407 fprintf(stderr, " -t\tSeconds to run each benchmark (default: 30)\n");
1408 fprintf(stderr, " -B\tNice the benchmarked thread to <int> (default: 0)\n");
1409 fprintf(stderr, " -N\tNice the load thread to <int> (default: 0)\n");
1410 //fprintf(stderr, " -u\tImitate uniprocessor\n");
1411 fprintf(stderr, " -b\tBenchmark loops_per_ms even if it is already known\n");
1412 fprintf(stderr, " -c\tOutput to console only (default: use console and logfile)\n");
1413 fprintf(stderr, " -r\tPerform real time scheduling benchmarks (default: non-rt)\n");
1414 fprintf(stderr, " -C\tUse <int> percentage cpu as a custom load (default: no custom load)\n");
1415 fprintf(stderr, " -I\tUse <int> microsecond intervals for custom load (needs -C as well)\n");
1416 fprintf(stderr, " -m\tAdd <comment> to the log file as a separate line\n");
1417 fprintf(stderr, " -w\tAdd <load type> to the list of loads to be tested against\n");
1418 fprintf(stderr, " -x\tExclude <load type> from the list of loads to be tested against\n");
1419 fprintf(stderr, " -W\tAdd <bench> to the list of benchmarks to be tested\n");
1420 fprintf(stderr, " -X\tExclude <bench> from the list of benchmarks to be tested\n");
1421 fprintf(stderr, " -h\tShow this help\n");
1422 fprintf(stderr, "\nIf run without parameters interbench will run a standard benchmark\n\n");
1426 void deadchild(int crap)
1433 if ((retval = waitpid(-1, &status, WNOHANG)) == -1) {
1434 if (errno == ECHILD)
1436 terminal_error("waitpid");
1438 if (WIFEXITED(status) && WEXITSTATUS(status) == 0)
1440 fprintf(stderr, "\nChild terminated abnormally ");
1441 if (WIFSIGNALED(status))
1442 fprintf(stderr, "with signal %d", WTERMSIG(status));
1443 fprintf(stderr, "\n");
1448 int load_index(const char* loadname)
1452 for (i = 0 ; i < THREADS ; i++)
1453 if (strcasecmp(loadname, threadlist[i].label) == 0)
1458 inline int bit_is_on(const unsigned int mask, int index)
1460 return (mask & (1 << index)) != 0;
1463 inline void set_bit_on(unsigned int *mask, int index)
1465 *mask |= (1 << index);
1468 int main(int argc, char **argv)
1470 unsigned long custom_cpu = 0;
1471 int q, i, j, affinity, benchmark = 0;
1472 unsigned int selected_loads = 0;
1473 unsigned int excluded_loads = 0;
1474 unsigned int selected_benches = 0;
1475 unsigned int excluded_benches = 0;
1478 * This file stores the loops_per_ms to be reused in a filename that
1481 char *fname = "interbench.loops_per_ms";
1482 char *comment = NULL;
1484 feenableexcept(FE_DIVBYZERO | FE_INVALID | FE_OVERFLOW);
1485 if (signal(SIGCHLD, deadchild) == SIG_ERR)
1486 terminal_error("signal");
1489 while ((q = getopt(argc, argv, "hl:L:B:N:ut:bcnrC:I:m:w:x:W:X:")) != -1) {
1495 ud.loops_per_ms = atoi(optarg);
1498 ud.duration = atoi(optarg);
1501 ud.cpu_load = atoi(optarg);
1504 ud.bench_nice = atoi(optarg);
1507 ud.load_nice = atoi(optarg);
1522 custom_cpu = (unsigned long)atol(optarg);
1525 ud.custom_interval = atol(optarg);
1531 i = load_index(optarg);
1533 fprintf(stderr, "Unknown load \"%s\"\n", optarg);
1536 set_bit_on(&selected_loads, i);
1539 i = load_index(optarg);
1541 fprintf(stderr, "Unknown load \"%s\"\n", optarg);
1544 set_bit_on(&excluded_loads, i);
1547 i = load_index(optarg);
1549 fprintf(stderr, "Unknown bench \"%s\"\n", optarg);
1552 set_bit_on(&selected_benches, i);
1555 i = load_index(optarg);
1557 fprintf(stderr, "Unknown bench \"%s\"\n", optarg);
1560 set_bit_on(&excluded_benches, i);
1569 /* default is all loads */
1570 if (selected_loads == 0)
1571 selected_loads = (unsigned int)-1;
1572 selected_loads &= ~excluded_loads;
1573 /* default is all benches */
1574 if (selected_benches == 0)
1575 selected_benches = (unsigned int)-1;
1576 selected_benches &= ~excluded_benches;
1579 fprintf(stderr, "Unable to get SCHED_FIFO (real time scheduling).\n");
1580 fprintf(stderr, "You either need to run this as root user or have support for real time RLIMITS.\n");
1582 fprintf(stderr, "Real time tests were requested, aborting.\n");
1585 fprintf(stderr, "Results will be unreliable.\n");
1588 fprintf(stderr, "Invalid cpu load\n");
1592 if ((custom_cpu && !ud.custom_interval) ||
1593 (ud.custom_interval && !custom_cpu) ||
1595 fprintf(stderr, "Invalid custom values, aborting.\n");
1599 if (custom_cpu && ud.custom_interval) {
1600 ud.custom_run = ud.custom_interval * custom_cpu / 100;
1601 threadlist[CUSTOM].bench = 1;
1602 threadlist[CUSTOM].load = 1;
1603 threadlist[CUSTOM].rtbench = 1;
1604 threadlist[CUSTOM].rtload = 1;
1607 /*FIXME Affinity commented out till working on all architectures */
1610 #ifdef CPU_SET /* Current glibc expects cpu_set_t */
1614 CPU_SET(0, &cpumask);
1615 #else /* Old glibc expects unsigned long */
1616 unsigned long cpumask = 1;
1618 if (sched_setaffinity(0, sizeof(cpumask), &cpumask) == -1) {
1620 terminal_error("sched_setaffinity");
1621 fprintf(stderr, "could not set cpu affinity\n");
1626 /* Make benchmark a multiple of 10 seconds for proper range of X loads */
1627 if (ud.duration % 10)
1628 ud.duration += 10 - ud.duration % 10;
1631 ud.loops_per_ms = 0;
1633 * Try to get loops_per_ms from command line first, file second, and
1634 * benchmark if not available.
1636 if (!ud.loops_per_ms) {
1639 if ((fp = fopen(fname, "r"))) {
1640 fscanf(fp, "%lu", &ud.loops_per_ms);
1641 if (fclose(fp) == -1)
1642 terminal_error("fclose");
1643 if (ud.loops_per_ms) {
1645 "%lu loops_per_ms read from file interbench.loops_per_ms\n",
1650 if (errno != ENOENT)
1651 terminal_error("fopen");
1653 fprintf(stderr, "loops_per_ms unknown; benchmarking...\n");
1656 * To get as accurate a loop as possible we time it running
1657 * SCHED_FIFO if we can
1663 fprintf(stderr, "loops_per_ms specified from command line\n");
1665 if (!(fp = fopen(fname, "w"))) {
1666 if (errno != EACCES) /* No write access is not terminal */
1667 terminal_error("fopen");
1668 fprintf(stderr, "Unable to write to file interbench.loops_per_ms\n");
1671 fprintf(fp, "%lu", ud.loops_per_ms);
1672 fprintf(stderr, "%lu loops_per_ms saved to file interbench.loops_per_ms\n",
1674 if (fclose(fp) == -1)
1675 terminal_error("fclose");
1683 if (ud.log && !(ud.logfile = fopen(ud.logfilename, "a"))) {
1684 if (errno != EACCES)
1685 terminal_error("fopen");
1686 fprintf(stderr, "Unable to write to logfile\n");
1690 log_output("Using %lu loops per ms, running every load for %d seconds\n",
1691 ud.loops_per_ms, ud.duration);
1692 log_output("Benchmarking kernel %s at datestamp %s\n",
1693 ud.unamer, ud.datestamp);
1695 log_output("Comment: %s\n", comment);
1698 for (i = 0 ; i < THREADS ; i++)
1699 threadlist[i].threadno = i;
1701 for (i = 0 ; i < THREADS ; i++) {
1702 struct thread *thi = &threadlist[i];
1706 benchme = &threadlist[i].rtbench;
1708 benchme = &threadlist[i].bench;
1710 if (!*benchme || !bit_is_on(selected_benches, i))
1713 log_output("--- Benchmarking simulated cpu of %s ", threadlist[i].label);
1715 log_output("real time ");
1716 else if (ud.bench_nice)
1717 log_output("nice %d ", ud.bench_nice);
1718 log_output("in the presence of simulated ");
1720 log_output("nice %d ", ud.load_nice);
1721 log_output("---\n");
1725 log_output("\tLatency +/- SD (us)");
1727 log_output("\tLatency +/- SD (ms)");
1728 log_output(" Max Latency ");
1729 log_output(" %% Desired CPU");
1730 if (!thi->nodeadlines)
1731 log_output(" %% Deadlines Met");
1734 for (j = 0 ; j < THREADS ; j++) {
1735 struct thread *thj = &threadlist[j];
1737 if (j == i || !bit_is_on(selected_loads, j) ||
1738 (!threadlist[j].load && !ud.do_rt) ||
1739 (!threadlist[j].rtload && ud.do_rt))
1741 log_output("%s\t", thj->label);