1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
|
/*
* Copyright (C) 2014 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "benchmark.h"
#include <pthread.h>
#include <semaphore.h>
#include <stdatomic.h>
#include <stdio.h>
static void BM_semaphore_sem_getvalue(int iters) {
StopBenchmarkTiming();
sem_t semaphore;
sem_init(&semaphore, 1, 1);
StartBenchmarkTiming();
for (int i = 0; i < iters; ++i) {
int dummy;
sem_getvalue(&semaphore, &dummy);
}
StopBenchmarkTiming();
}
BENCHMARK(BM_semaphore_sem_getvalue);
static void BM_semaphore_sem_wait_sem_post(int iters) {
StopBenchmarkTiming();
sem_t semaphore;
sem_init(&semaphore, 1, 1);
StartBenchmarkTiming();
for (int i = 0; i < iters; ++i) {
sem_wait(&semaphore);
sem_post(&semaphore);
}
StopBenchmarkTiming();
}
BENCHMARK(BM_semaphore_sem_wait_sem_post);
/*
* This test reports the overhead of the underlying futex wake syscall on
* the producer. It does not report the overhead from issuing the wake to the
* point where the posted consumer thread wakes up. It suffers from
* clock_gettime syscall overhead. Lock the CPU speed for consistent results
* as we may not reach >50% cpu utilization.
*
* We will run a background thread that catches the sem_post wakeup and
* loops immediately returning back to sleep in sem_wait for the next one. This
* thread is run with policy SCHED_OTHER (normal policy), a middle policy.
*
* The primary thread will run at SCHED_IDLE (lowest priority policy) when
* monitoring the background thread to detect when it hits sem_wait sleep. It
* will do so with no clock running. Once we are ready, we will switch to
* SCHED_FIFO (highest priority policy) to time the act of running sem_post
* with the benchmark clock running. This ensures nothing else in the system
* can preempt our timed activity, including the background thread. We are
* also protected with the scheduling policy of letting a process hit a
* resource limit rather than get hit with a context switch.
*
* The background thread will start executing either on another CPU, or
* after we back down from SCHED_FIFO, but certainly not in the context of
* the timing of the sem_post.
*/
static atomic_int BM_semaphore_sem_post_running;
static void *BM_semaphore_sem_post_start_thread(void *obj) {
sem_t *semaphore = reinterpret_cast<sem_t *>(obj);
while ((BM_semaphore_sem_post_running > 0) && !sem_wait(semaphore)) {
;
}
BM_semaphore_sem_post_running = -1;
return NULL;
}
static void BM_semaphore_sem_post(int iters) {
StopBenchmarkTiming();
sem_t semaphore;
sem_init(&semaphore, 0, 0);
pthread_attr_t attr;
pthread_attr_init(&attr);
BM_semaphore_sem_post_running = 1;
struct sched_param param = { 0, };
pthread_attr_setschedparam(&attr, ¶m);
pthread_attr_setschedpolicy(&attr, SCHED_OTHER);
pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED);
#ifdef PTHREAD_SET_INHERIT_SCHED
pthread_attr_setinheritsched(&attr, PTHREAD_EXPLICIT_SCHED);
#endif
pthread_t pthread;
pthread_create(&pthread, &attr, BM_semaphore_sem_post_start_thread, &semaphore);
pthread_attr_destroy(&attr);
sched_setscheduler((pid_t)0, SCHED_IDLE, ¶m);
for (int i = 0; i < iters; ++i) {
int trys = 3, dummy = 0;
do {
if (BM_semaphore_sem_post_running < 0) {
sched_setscheduler((pid_t)0, SCHED_OTHER, ¶m);
fprintf(stderr, "BM_semaphore_sem_post: start_thread died unexpectedly\n");
return;
}
sched_yield();
sem_getvalue(&semaphore, &dummy);
if (dummy < 0) { // POSIX.1-2001 possibility 1
break;
}
if (dummy == 0) { // POSIX.1-2001 possibility 2
--trys;
}
} while (trys);
param.sched_priority = 1;
sched_setscheduler((pid_t)0, SCHED_FIFO, ¶m);
StartBenchmarkTiming();
sem_post(&semaphore);
StopBenchmarkTiming(); // Remember to subtract clock syscall overhead
param.sched_priority = 0;
sched_setscheduler((pid_t)0, SCHED_IDLE, ¶m);
}
sched_setscheduler((pid_t)0, SCHED_OTHER, ¶m);
if (BM_semaphore_sem_post_running > 0) {
BM_semaphore_sem_post_running = 0;
}
do {
sem_post(&semaphore);
sched_yield();
} while (!BM_semaphore_sem_post_running);
}
BENCHMARK(BM_semaphore_sem_post);
/*
* This test reports the overhead of sem_post to sem_wake. A circle of
* num_semaphore - 1 threads are run on a set of semaphores to measure the
* activity. One can calculate the sem_wake overhead alone by:
*
* BM_semaphore_sem_post_sem_wait - BM_semaphore_sem_post - BM_time_clock_gettime
*
* Differences will result if there are more threads than active processors,
* there will be delay induced when scheduling the processes. This cost is
* measured by trying different values of num_semaphore. The governor selected
* will have a major impact on the results for a large number of threads.
*
* To reduce the chances for threads racing ahead and not triggering the
* futex, for example the background threads finish their job before the
* sem_wait is hit in the main thread, the background threads will run at
* batch priority and the main thread at fifo priority. This should generally
* guarantee the main thread completes its task of priming itself with the
* sem_wait before the other threads can start. In practice without the
* sched mechanics here, this works on Android configured kernels, this is
* insurance for wacky(tm) sched configurations.
*/
static void *BM_semaphore_sem_post_sem_wait_start_thread(void *obj) {
sem_t *semaphore = reinterpret_cast<sem_t *>(obj);
while ((BM_semaphore_sem_post_running > 0) && !sem_wait(semaphore)) {
sem_post(semaphore + 1);
}
--BM_semaphore_sem_post_running;
return NULL;
}
static void BM_semaphore_sem_post_sem_wait_num(int iters, int num_semaphore) {
StopBenchmarkTiming();
sem_t semaphore[num_semaphore];
for (int i = 0; i < num_semaphore; ++i) {
sem_init(semaphore + i, 0, 0);
}
pthread_attr_t attr;
pthread_attr_init(&attr);
BM_semaphore_sem_post_running = 1;
struct sched_param param = { 0, };
pthread_attr_setschedparam(&attr, ¶m);
pthread_attr_setschedpolicy(&attr, SCHED_BATCH);
pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED);
#ifdef PTHREAD_SET_INHERIT_SCHED
pthread_attr_setinheritsched(&attr, PTHREAD_EXPLICIT_SCHED);
#endif
for (int i = 0; i < (num_semaphore - 1); ++i) {
pthread_t pthread;
pthread_create(&pthread, &attr, BM_semaphore_sem_post_sem_wait_start_thread, semaphore + i);
}
pthread_attr_destroy(&attr);
sched_yield();
param.sched_priority = 1;
sched_setscheduler((pid_t)0, SCHED_FIFO, ¶m);
StartBenchmarkTiming();
for (int i = 0; i < iters; i += num_semaphore) {
sem_post(semaphore);
sem_wait(semaphore + num_semaphore - 1);
}
StopBenchmarkTiming();
param.sched_priority = 0;
sched_setscheduler((pid_t)0, SCHED_OTHER, ¶m);
if (BM_semaphore_sem_post_running > 0) {
BM_semaphore_sem_post_running = 0;
}
for (int i = 0;
(i < (10 * num_semaphore)) && (BM_semaphore_sem_post_running > (1 - num_semaphore));
++i) {
for (int j = 0; j < (num_semaphore - 1); ++j) {
sem_post(semaphore + j);
}
sched_yield();
}
}
static void BM_semaphore_sem_post_sem_wait_low(int iters) {
BM_semaphore_sem_post_sem_wait_num(iters, 2);
}
BENCHMARK(BM_semaphore_sem_post_sem_wait_low);
static void BM_semaphore_sem_post_sem_wait_high(int iters) {
BM_semaphore_sem_post_sem_wait_num(iters, 100);
}
BENCHMARK(BM_semaphore_sem_post_sem_wait_high);
|