setitimer late in delivering signals
site_archiver@lists.apple.com Delivered-To: darwin-dev@lists.apple.com Dkim-signature: a=rsa-sha1; c=relaxed/relaxed; d=gmail.com; s=beta; h=domainkey-signature:received:received:message-id:date:from:to:subject:mime-version:content-type:content-transfer-encoding:content-disposition; b=LAHOh6aBXTSgWMP7ZCoqRG0RPmhRgemH+0m2IhjWUynvBqxWqFVPiQZOSA6CcrzJfZpvrL0RVC9FkUPQ8Y4R5hMUFpBfzZQXPeO2OHxpNT+QkODiClg4KDY7pyN8RU/SnHmrvSjuEGBSjEV3Y6QRADV/lks+VVtEYMsw7CgDfy8= Domainkey-signature: a=rsa-sha1; c=nofws; d=gmail.com; s=beta; h=received:message-id:date:from:to:subject:mime-version:content-type:content-transfer-encoding:content-disposition; b=OaS4pC/Tjqj4CzzTW+kbUz3wYTskQLG+iaR2UgVSPiddwhhow5r36/4G+5P+gCSJhe7Wx/I7w73pgB1di5E3AbQUv7KXjYQcGLqQxE51xlefOCbUM7DSdvRk1rppUN1EJZalwGpbHTrodGRZpFYyO2A7XaiLcjA8MO/UEfuW0qc= Hi, I have an application that uses setitimer(ITIMER_PROF,...) and sigaction(SIGPROF,...) for profiling. On a dual-core/dual-proc Intel MacPro (MacOSX 10.4.10), I was observing that the profiler would report 5s of CPU time (based on the number of SIGPROFs handled), but /usr/bin/time was clearly indicating that the application was using 136s of CPU time. A little investigating shows that, while setitimer was called with a 10000 us interval, there were a number of occasions where more than *10 seconds* elapsed between invocations of the SIGPROF handler. This short program demonstrates the problem: /* begin test.c */ #include <stdlib.h> #include <stdio.h> #include <signal.h> #include <unistd.h> #include <sys/time.h> #include <sys/resource.h> void timevalPlus (struct timeval *tv1, struct timeval *tv2, struct timeval *tv) { const long int million = 1000000; long int sec, usec; sec = tv1->tv_sec + tv2->tv_sec; usec = tv1->tv_usec + tv2->tv_usec; sec += (usec / million); usec %= million; tv->tv_sec = sec; tv->tv_usec = usec; } void timevalMinus (struct timeval *tv1, struct timeval *tv2, struct timeval *tv) { const long int million = 1000000; long int sec, usec; sec = (tv1->tv_sec - tv2->tv_sec) - 1; usec = tv1->tv_usec + million - tv2->tv_usec; sec += (usec / million); usec %= million; tv->tv_sec = sec; tv->tv_usec = usec; } #define INTERVAL_USEC 10000 static void check() { struct rusage ru; struct timeval rusage_time_cur; struct timeval rusage_time_diff; static struct timeval rusage_time_last; static long int count = 0; static long int bad = 0; static long int worse = 0; static long int worst = 0; if (0 != getrusage (0, &ru)) { fprintf (stderr, "getrusage failed\n"); exit(1); } timevalPlus(&ru.ru_utime, &ru.ru_stime, &rusage_time_cur); if (count > 0) { timevalMinus (&rusage_time_cur, &rusage_time_last, &rusage_time_diff); int show_status = 0; if (count % 500 == 0) show_status = 1; if (rusage_time_diff.tv_sec > 0) { worst++; show_status = 1; } else if (rusage_time_diff.tv_usec > 5 * INTERVAL_USEC) { worse++; } else if (rusage_time_diff.tv_usec > ((3 * INTERVAL_USEC) / 2)) { bad++; } if (show_status) { printf ("count = %li, bad = %li, worse = %li, worst = %li\n", count, bad, worse, worst); printf ("rusage_time_diff.tv_sec = %li, rusage_time_diff.tv_usec = %li\n\n", (long int)rusage_time_diff.tv_sec, (long int)rusage_time_diff.tv_usec); } } if (count >= 1000) { printf ("count = %li, bad = %li, worse = %li, worst = %li\n", count, bad, worse, worst); printf ("rusage_time_cur.tv_sec = %li, rusage_time_cur.tv_usec = %li\n", (long int)rusage_time_cur.tv_sec, (long int)rusage_time_cur.tv_usec); exit (0); } count++; rusage_time_last = rusage_time_cur; } static volatile int perform_check; static void action(int sig, siginfo_t *sip, void* z) { perform_check = 1; } int main(int argc, char* argv[]) { struct sigaction sa; struct itimerval iv; perform_check = 0; sigemptyset (&sa.sa_mask); sa.sa_flags = SA_RESTART | SA_SIGINFO; sa.sa_sigaction = (void (*)(int, siginfo_t*, void*))action; if (0 != sigaction (SIGPROF, &sa, NULL)) { fprintf (stderr, "sigaction failed\n"); exit(1); } iv.it_interval.tv_sec = 0; iv.it_interval.tv_usec = INTERVAL_USEC; iv.it_value.tv_sec = 0; iv.it_value.tv_usec = INTERVAL_USEC; if (0 != setitimer (ITIMER_PROF, &iv, NULL)) { fprintf (stderr, "setitimer failed\n"); exit(1); } /* Something to keep the processor busy. */ int num, cycleLen, n; num = 0; while (1) { if (perform_check) { check(); perform_check = 0; } cycleLen = 0; n = num; while (1) { if (perform_check) { check(); perform_check = 0; } if (n == 0 || n == 1) { break; } else if (n % 2 == 1) { n = 3 * n + 1; } else { n = n / 2; } cycleLen++; } if (cycleLen > 710) { printf ("num = %i, cycleLen = %i\n", num, cycleLen); } num++; } return 0; } /* end test.c */ Essentially, the 'check' function determines the elapsed CPU time between invocations, and records when things are "bad" (elapsed time is between 15000us and 50000us), "worse" (elapsed time is between 50000us and 1s), and "worst' (elapsed time greater than 1s). Running on my MacPro, I get output like the following: /* begin x86-darwin.4cpu.txt */ count = 46, bad = 0, worse = 0, worst = 1 rusage_time_diff.tv_sec = 1, rusage_time_diff.tv_usec = 879932 count = 84, bad = 0, worse = 0, worst = 2 rusage_time_diff.tv_sec = 1, rusage_time_diff.tv_usec = 729972 ... count = 610, bad = 0, worse = 14, worst = 17 rusage_time_diff.tv_sec = 14, rusage_time_diff.tv_usec = 189995 count = 614, bad = 0, worse = 14, worst = 18 rusage_time_diff.tv_sec = 64, rusage_time_diff.tv_usec = 809370 count = 772, bad = 0, worse = 14, worst = 19 rusage_time_diff.tv_sec = 1, rusage_time_diff.tv_usec = 169648 count = 779, bad = 0, worse = 14, worst = 20 rusage_time_diff.tv_sec = 6, rusage_time_diff.tv_usec = 379854 count = 851, bad = 0, worse = 14, worst = 21 rusage_time_diff.tv_sec = 124, rusage_time_diff.tv_usec = 199970 count = 856, bad = 0, worse = 14, worst = 22 rusage_time_diff.tv_sec = 5, rusage_time_diff.tv_usec = 319675 ... count = 994, bad = 0, worse = 18, worst = 28 rusage_time_diff.tv_sec = 2, rusage_time_diff.tv_usec = 209937 count = 1000, bad = 0, worse = 18, worst = 28 rusage_time_diff.tv_sec = 0, rusage_time_diff.tv_usec = 10000 count = 1000, bad = 0, worse = 18, worst = 28 rusage_time_cur.tv_sec = 363, rusage_time_cur.tv_usec = 911990 /* end x86-darwin.4cpu.txt */ which shows, for example, that the 610th signal was delivered 14s after then 609th, the 614th signal was delivered 64s after the 613th, and the 851st signal was delivered 124s after then 850th. The final lines show that program spent a total of 363s waiting for the delivery of 1000 signals. Using the CHUD tools, I disabled all but one of the cores on my MacPro; then I get output like the following: /* begin x86-darwin.1cpu.txt */ count = 500, bad = 1, worse = 0, worst = 0 rusage_time_diff.tv_sec = 0, rusage_time_diff.tv_usec = 9967 count = 1000, bad = 10, worse = 0, worst = 0 rusage_time_diff.tv_sec = 0, rusage_time_diff.tv_usec = 9984 count = 1000, bad = 10, worse = 0, worst = 0 rusage_time_cur.tv_sec = 10, rusage_time_cur.tv_usec = 31320 /* end x86-darwin.1cpu.txt */ which is much more reasonable -- only 10 signals were delivered slightly late and the program spent a total of 10s waiting for the delivery of 1000 signals. With 2 cores enabled, I get output like the following: /* begin x86-darwin.2cpu.txt */ count = 500, bad = 4, worse = 19, worst = 0 rusage_time_diff.tv_sec = 0, rusage_time_diff.tv_usec = 10000 count = 833, bad = 5, worse = 35, worst = 1 rusage_time_diff.tv_sec = 1, rusage_time_diff.tv_usec = 9974 count = 1000, bad = 6, worse = 45, worst = 1 rusage_time_diff.tv_sec = 0, rusage_time_diff.tv_usec = 269998 count = 1000, bad = 6, worse = 45, worst = 1 rusage_time_cur.tv_sec = 32, rusage_time_cur.tv_usec = 267562 /* end x86-darwin.2cpu.txt */ which is somewhat better than the 4cpu experiment, but the program is still spending a total of 32s waiting for the deliver of 1000 signals. The phenomenon is also present on PowerPC; on a dual-processor PowerPC MacPro, I get output like the following: /* begin powerpc-darwin.2cpu.txt */ count = 426, bad = 1, worse = 10, worst = 1 rusage_time_diff.tv_sec = 1, rusage_time_diff.tv_usec = 9804 count = 500, bad = 1, worse = 16, worst = 1 rusage_time_diff.tv_sec = 0, rusage_time_diff.tv_usec = 10000 count = 506, bad = 1, worse = 16, worst = 2 rusage_time_diff.tv_sec = 1, rusage_time_diff.tv_usec = 79375 count = 652, bad = 1, worse = 19, worst = 3 rusage_time_diff.tv_sec = 1, rusage_time_diff.tv_usec = 329479 count = 1000, bad = 1, worse = 32, worst = 3 rusage_time_diff.tv_sec = 0, rusage_time_diff.tv_usec = 9999 count = 1000, bad = 1, worse = 32, worst = 3 rusage_time_cur.tv_sec = 26, rusage_time_cur.tv_usec = 334582 /* end powerpc-darwin.2cpu.txt */ I also observe the late delivery of signals with ITIMER_VIRTUAL/SIGVTALRM, though (curiously) not with ITIMER_REAL/SIGALRM. Nonetheless, for my application, ITIMER_PROF/SIGPROF is really the appropriate sampling mechanism. On multi-cpu Linux machines (x86 and amd64), with the above program, I observe no late delivery of signals and the program spends a total of 11s waiting for the delivery of 10000 signals. While I suppose that setitimer doesn't provide a guarantee on the timely delivery of signals, 124s on a 10000us interval seems a bit excessive. Is this a known problem? Is there a workaround? Is there a better forum to raise this issue? Thanks, -Matthew _______________________________________________ Do not post admin requests to the list. They will be ignored. Darwin-dev mailing list (Darwin-dev@lists.apple.com) Help/Unsubscribe/Update your Subscription: http://lists.apple.com/mailman/options/darwin-dev/site_archiver%40lists.appl... This email sent to site_archiver@lists.apple.com
participants (1)
-
Matthew Fluet