1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
|
/*
* Copyright (c) 2018, Cornell University
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials provided
* with the distribution.
*
* Neither the name of Cornell University nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
* USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
* AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*
* Authors: Tuan Ta
*/
//------------------------------------------------------------------------
// This test_macros includes necessary functions and macros to create
// and exit threads. They're used in multi-threaded assembly tests.
// This assumes the target system can concurrently support 4 different
// threads (i.e., 1 master thread and 3 child threads).
//
// Threads are synchronized through futex system call (i.e., wait and
// wakeup operations).
//------------------------------------------------------------------------
#ifndef __TEST_MACROS_MT_FUTEX_H
#define __TEST_MACROS_MT_FUTEX_H
#define SYSCALL_FUTEX 98
#define SYSCALL_GETTID 178
#define SYSCALL_MUNMAP 215
#define SYSCALL_CLONE 220
#define SYSCALL_MMAP 222
#define MEM_SIZE (4096 * 1024)
#define PROT_READ 0x1
#define PROT_WRITE 0x2
#define MMAP_PROT_FLAGS (PROT_READ | PROT_WRITE)
#define MAP_PRIVATE 0x02
#define MAP_ANONYMOUS 0x20
#define MAP_STACK 0x20000
#define MMAP_MAP_FLAGS (MAP_PRIVATE | MAP_ANONYMOUS | MAP_STACK)
#define CLONE_VM 0x00000100
#define CLONE_FS 0x00000200
#define CLONE_FILES 0x00000400
#define CLONE_SIGHAND 0x00000800
#define CLONE_PARENT 0x00008000
#define CLONE_THREAD 0x00010000
#define CLONE_IO 0x80000000
#define CLONE_PARENT_SETTID 0x00100000 /* set the TID in the parent */
#define CLONE_CHILD_CLEARTID 0x00200000 /* clear the TID in the child */
#define CLONE_SETTLS 0x00080000
#define CLONE_FLAGS (CLONE_VM | CLONE_FS | CLONE_FILES \
| CLONE_SIGHAND | CLONE_PARENT \
| CLONE_THREAD | CLONE_IO \
| CLONE_PARENT_SETTID \
| CLONE_CHILD_CLEARTID \
| CLONE_SETTLS)
#define FUTEX_WAIT 0
#define FUTEX_WAKE 1
#define FUTEX_CMP_REQUEUE 4
#define FUTEX_WAKE_OP 5
#define FUTEX_WAIT_BITSET 9
#define FUTEX_WAKE_BITSET 10
#define FUTEX_PRIVATE_FLAG 128
#define FUTEX_CLOCK_REALTIME 256
#define FUTEX_CMD_MASK ~(FUTEX_PRIVATE_FLAG | FUTEX_CLOCK_REALTIME)
#define FUTEX_OP_SET 0 /* uaddr2 = oparg; */
#define FUTEX_OP_ADD 1 /* uaddr2 += oparg; */
#define FUTEX_OP_OR 2 /* uaddr2 |= oparg; */
#define FUTEX_OP_ANDN 3 /* uaddr2 &= ~oparg; */
#define FUTEX_OP_XOR 4 /* uaddr2 ^= oparg; */
#define FUTEX_OP_ARG_SHIFT 8 /* Use (1 << oparg) as operand */
#define FUTEX_OP_CMP_EQ 0 /* if (oldval == cmparg) wake */
#define FUTEX_OP_CMP_NE 1 /* if (oldval != cmparg) wake */
#define FUTEX_OP_CMP_LT 2 /* if (oldval < cmparg) wake */
#define FUTEX_OP_CMP_LE 3 /* if (oldval <= cmparg) wake */
#define FUTEX_OP_CMP_GT 4 /* if (oldval > cmparg) wake */
#define FUTEX_OP_CMP_GE 5 /* if (oldval >= cmparg) wake */
#define FUTEX_OP(op, oparg, cmp, cmparg) \
(((op & 0xf) << 28) | \
((cmp & 0xf) << 24) | \
((oparg & 0xfff) << 12) | \
(cmparg & 0xfff))
#define FUTEX_WAIT_PRIVATE (FUTEX_WAIT | FUTEX_PRIVATE_FLAG)
#define FUTEX_WAKE_PRIVATE (FUTEX_WAKE | FUTEX_PRIVATE_FLAG)
#define FUTEX_WAIT_BITSET_PRIVATE (FUTEX_WAIT_BITSET | FUTEX_PRIVATE_FLAG)
#define FUTEX_WAKE_BITSET_PRIVATE (FUTEX_WAKE_BITSET | FUTEX_PRIVATE_FLAG)
#define FAILURE 1
#define SUCCESS 0
//------------------------------------------------------------------------
// _create_threads: create a given number of threads
//
// The calling thread (a.k.a, master thread) saves information about its
// child threads in its stack in the following structure:
//
// | child_stack_ptr_0 | << fp: frame pointer
// | child_tls_ptr_0 |
// | child_thread_id_0 |
// | saved_child_thread_id_0 |
// | child_stack_ptr_1 |
// | child_tls_ptr_1 |
// | child_thread_id_1 |
// | saved_child_thread_id_1 |
// | ... | << sp: stack pointer
//
// For each child thread, we need to save the following information
// in the parent thread's stack frame:
//
// - child_stack_ptr stores the lower address of the child thread's
// stack space
//
// - child_tls_ptr stores the lower address of the child thread's
// thread local storage (TLS)
//
// - child_thread_id stores the thread ID of the child thread. This
// variable will be cleared by the child thread when it exits.
//
// - saved_child_thread_id also stores the thread ID of the child
// thread, but this variable is used only by the parent thread.
//
// This function takes the number of threads to create in a0. It
// updates n_child_threads variable to the number of successfully
// created threads.
//------------------------------------------------------------------------
_create_threads:
mv t0, a0 // get the number of threads
mv s0, ra // save return register
la t3, n_worker_threads
1:
// allocate a new stack space and save its pointer in the caller's stack
jal ra, _alloc_mem
addi sp, sp, -8
sd a0, (sp)
mv t1, a0
// allocate a new thread local storage (TLS) and save its pointer in the
// caller's stack
jal ra, _alloc_mem
addi sp, sp, -8
sd a0, (sp)
mv t2, a0
// allocate space in the caller's stack to store new thread ID
addi sp, sp, -8
// clone a new thread
li a0, CLONE_FLAGS
li s2, MEM_SIZE
add a1, t1, s2 // pointer to the high address of the new stack
mv a2, sp // ptid
mv a3, t2 // pointer to the low address of the new TLS,
// assuming TLS grows upward
mv a4, sp // ctid
li a7, SYSCALL_CLONE // clone syscall number
ecall // call clone syscall
bltz a0, 2f // syscall error
beqz a0, _mt_test // only the new thread jumps to _mt_test
// save child thread ID in the caller's stack
addi sp, sp, -8
sd a0, (sp)
// decrement the number of threads to create
addi t0, t0, -1
// increment the number of successfully created threads sofar
addi t4, zero, 1
amoadd.d zero, t4, (t3)
// check if we still need to spawn more threads
bnez t0, 1b
j 3f
2:
// handle clone syscall error by deleting the last memory frame created
// for the unsuccessfully spawned thread.
addi sp, sp, 8 // skip child_thread_id
// deallocate last allocated tls
ld a0, (sp)
jal ra, _dealloc_mem
addi sp, sp, 8
// deallocate last allocated stack
ld a0, (sp)
jal ra, _dealloc_mem
addi sp, sp, 8
3:
// finish creating threads
mv ra, s0
ret
//------------------------------------------------------------------------
// _alloc_mem: allocate a memory space with size MEM_SIZE
//
// This function returns the pointer to the newly allocated memory
// space in a0
//------------------------------------------------------------------------
_alloc_mem:
li a0, 0
li a1, MEM_SIZE
li a2, MMAP_PROT_FLAGS
li a3, MMAP_MAP_FLAGS
li a4, -1
li a5, 0
li a7, SYSCALL_MMAP
ecall
ret
//------------------------------------------------------------------------
// _delete_threads: deallocate all child threads
//
// This function assumes the following structure in the calling thread's
// stack frame
//
// | child_stack_ptr_0 | << fp: frame pointer
// | child_tls_ptr_0 |
// | child_thread_id_0 |
// | saved_child_thread_id_0 |
// | child_stack_ptr_1 |
// | child_tls_ptr_1 |
// | child_thread_id_1 |
// | saved_child_thread_id_1 |
// | ... | << sp: stack pointer
//
// This function takes the number of threads to delete in a0
//------------------------------------------------------------------------
_delete_threads:
mv t0, a0 // get the number of threads to delete
mv s0, ra // save return register
1:
addi sp, sp, 8 // skip saved_child_thread_id
addi sp, sp, 8 // skip child_thread_id
// deallocate thread's tls
ld a0, (sp)
jal ra, _dealloc_mem
addi sp, sp, 8
// deallocate thread's stack
ld a0, (sp)
jal ra, _dealloc_mem
addi sp, sp, 8
// decrement the number of threads to delete
addi t0, t0, -1
bnez t0, 1b
// finish deleting all threads
mv ra, s0 // restore return register
ret
//------------------------------------------------------------------------
// _dealloc_mem: deallocate memory space of size MEM_SIZE
//
// This function takes the pointer to the memory space in a0
//------------------------------------------------------------------------
_dealloc_mem:
li a1, MEM_SIZE
li a7, SYSCALL_MUNMAP
ecall
ret
//------------------------------------------------------------------------
// _join: wait for all child threads to exit
//
// Child threads are created with CLONE_CHILD_CLEARTID flag, so when
// they exit, they will clear the ctid/ptid variable and wake up their
// parent thread.
//
// This function assumes the following structure in the calling thread's
// stack frame
//
// | child_stack_ptr_0 | << fp: frame pointer
// | child_tls_ptr_0 |
// | child_thread_id_0 |
// | saved_child_thread_id_0 |
// | child_stack_ptr_1 |
// | child_tls_ptr_1 |
// | child_thread_id_1 |
// | saved_child_thread_id_1 |
// | ... | << sp: stack pointer
//
// This function takes a number of threads to wait in a0
//------------------------------------------------------------------------
_join:
mv t0, a0 // get the number of threads
mv s0, ra // save return register
mv s1, sp // save stack pointer
1:
// Calling futex_wait on ctidptr
ld a2, (sp) // get child thread ID from
// saved_child_thread_id
addi sp, sp, 8
mv a0, sp // futex address (child_thread_id)
li a1, FUTEX_WAIT_PRIVATE
li a7, SYSCALL_FUTEX
ecall
addi sp, sp, 8 // skip child_tls_ptr
addi sp, sp, 8 // skip child_stack_ptr
// decrement the number of threads to wait for
addi t0, t0, -1
bnez t0, 1b
// finish waiting for all threads
mv ra, s0 // restore return register
mv sp, s1 // restore stack pointer
ret
#define MT_DATA \
n_worker_threads: .dword 0; \
shared_var: .dword 0; \
barrier: .dword 0; \
array: .dword 0x00000000deadbeef, \
0xdeadbeefdeadbeef, \
0x12343eeaaf423451; \
#endif
|