rempas ревизій цього gist . До ревизії
1 file changed, 23 insertions, 13 deletions
stack_head_clone3_futex.c
@@ -92,6 +92,11 @@ struct __attribute((aligned(16))) stack_head { | |||
92 | 92 | ||
93 | 93 | typedef struct stack_head stack_head; | |
94 | 94 | ||
95 | + | typedef struct { | |
96 | + | void *tail; | |
97 | + | stack_head *head; | |
98 | + | } thread_stack; | |
99 | + | ||
95 | 100 | __attribute((naked)) static long newthread(CloneArgs* args) { | |
96 | 101 | __asm volatile ( | |
97 | 102 | /* "args" is already in "rdi" */ | |
@@ -118,35 +123,40 @@ static void threadentry(stack_head *stack) { | |||
118 | 123 | exit(0); | |
119 | 124 | } | |
120 | 125 | ||
121 | - | static stack_head *newstack(long size) { | |
126 | + | static thread_stack newstack(long size) { | |
122 | 127 | unsigned long p = SYSCALL6(SYS_mmap, 0, size, 3, 0x22, -1, 0); | |
123 | - | if (p > -4096UL) { return 0; } | |
124 | 128 | ||
125 | - | long count = size / sizeof(stack_head); | |
126 | - | return (stack_head*)p + count - 1; | |
129 | + | if (p > -4096UL) { /* Return "null" on failure */ | |
130 | + | thread_stack stack = { (void*)0, (void*)0 }; | |
131 | + | return stack; | |
132 | + | } | |
133 | + | ||
134 | + | long count = size / sizeof(thread_stack); | |
135 | + | thread_stack stack = { (void*)(p + count - 1), (void*)count }; | |
136 | + | return stack; | |
127 | 137 | } | |
128 | 138 | ||
129 | 139 | __attribute((force_align_arg_pointer)) void _start(void) { | |
130 | - | stack_head *stack = newstack(1<<16); | |
131 | - | stack->entry = threadentry; | |
140 | + | thread_stack stack = newstack(1<<16); | |
141 | + | stack.head->entry = threadentry; | |
132 | 142 | ||
133 | 143 | // Thread data | |
134 | - | stack->message = "hello world\n"; | |
135 | - | stack->message_length = 12; | |
136 | - | stack->print_count = 20; | |
144 | + | stack.head->message = "hello world\n"; | |
145 | + | stack.head->message_length = 12; | |
146 | + | stack.head->print_count = 20; | |
137 | 147 | ||
138 | - | stack->join_futex = 0; // Set the "futex" | |
148 | + | stack.head->join_futex = 0; // Set the "futex" | |
139 | 149 | ||
140 | 150 | CloneArgs args = { | |
141 | 151 | CLONE_VM, | |
142 | 152 | 0, 0, 0, SIGCHLD /* I have also tried using "0" here */, | |
143 | - | (__aligned_u64)&stack, /* stack */ | |
144 | - | 1 << 16, /* stack_size */ | |
153 | + | (__aligned_u64)stack.tail, /* stack */ | |
154 | + | (void *)stack.head - stack.tail, /* stack_size */ | |
145 | 155 | 0, 0, 0, 0 | |
146 | 156 | }; | |
147 | 157 | ||
148 | 158 | newthread(&args); | |
149 | 159 | ||
150 | - | futex_wait(&stack->join_futex, 0); | |
160 | + | futex_wait(&stack.head->join_futex, 0); | |
151 | 161 | exit_group(0); | |
152 | 162 | } |
rempas ревизій цього gist . До ревизії
Без змін
rempas ревизій цього gist . До ревизії
1 file changed, 152 insertions
stack_head_clone3_futex.c(файл створено)
@@ -0,0 +1,152 @@ | |||
1 | + | // libc-free x86-64 Linux multi-threading example | |
2 | + | // $ cc -nostdlib stack_head.c | |
3 | + | // Ref: https://nullprogram.com/blog/2023/03/23/ | |
4 | + | // This is free and unencumbered software released into the public domain. | |
5 | + | ||
6 | + | #include "clone_args.h" | |
7 | + | ||
8 | + | #define SYS_write 1 | |
9 | + | #define SYS_mmap 9 | |
10 | + | #define SYS_nanosleep 35 | |
11 | + | #define SYS_clone 56 | |
12 | + | #define SYS_exit 60 | |
13 | + | #define SYS_futex 202 | |
14 | + | #define SYS_exit_group 231 | |
15 | + | ||
16 | + | #define FUTEX_WAIT 0 | |
17 | + | #define FUTEX_WAKE 1 | |
18 | + | ||
19 | + | #define SYSCALL1(n, a) \ | |
20 | + | syscall6(n,(long)(a),0,0,0,0,0) | |
21 | + | #define SYSCALL2(n, a, b) \ | |
22 | + | syscall6(n,(long)(a),(long)(b),0,0,0,0) | |
23 | + | #define SYSCALL3(n, a, b, c) \ | |
24 | + | syscall6(n,(long)(a),(long)(b),(long)(c),0,0,0) | |
25 | + | #define SYSCALL4(n, a, b, c, d) \ | |
26 | + | syscall6(n,(long)(a),(long)(b),(long)(c),(long)(d),0,0) | |
27 | + | #define SYSCALL5(n, a, b, c, d, e) \ | |
28 | + | syscall6(n,(long)(a),(long)(b),(long)(c),(long)(d),(long)(e),0) | |
29 | + | #define SYSCALL6(n, a, b, c, d, e, f) \ | |
30 | + | syscall6(n,(long)(a),(long)(b),(long)(c),(long)(d),(long)(e),(long)(f)) | |
31 | + | ||
32 | + | static long syscall6(long n, long a, long b, long c, long d, long e, long f) { | |
33 | + | register long ret; | |
34 | + | register long r10 asm("r10") = d; | |
35 | + | register long r8 asm("r8") = e; | |
36 | + | register long r9 asm("r9") = f; | |
37 | + | __asm volatile ( | |
38 | + | "syscall" | |
39 | + | : "=a"(ret) | |
40 | + | : "a"(n), "D"(a), "S"(b), "d"(c), "r"(r10), "r"(r8), "r"(r9) | |
41 | + | : "rcx", "r11", "memory" | |
42 | + | ); | |
43 | + | return ret; | |
44 | + | } | |
45 | + | ||
46 | + | static void millisleep(int ms) { | |
47 | + | long ts[] = {ms/1000, ms%1000 * 1000000L}; | |
48 | + | SYSCALL2(SYS_nanosleep, ts, ts); | |
49 | + | } | |
50 | + | ||
51 | + | /* int num = 65; */ // Ignore this, used for my own personal test | |
52 | + | ||
53 | + | static long fullwrite(int fd, void *buf, long len) { | |
54 | + | for (long off = 0; off < len;) { | |
55 | + | long r = SYSCALL3(SYS_write, fd, buf+off, len-off); | |
56 | + | if (r < 0) { return r; } | |
57 | + | off += r; | |
58 | + | } | |
59 | + | ||
60 | + | return len; | |
61 | + | } | |
62 | + | ||
63 | + | __attribute((noreturn)) static void exit(int status) { | |
64 | + | SYSCALL1(SYS_exit, status); | |
65 | + | __builtin_unreachable(); | |
66 | + | } | |
67 | + | ||
68 | + | __attribute((noreturn)) static void exit_group(int status) { | |
69 | + | SYSCALL1(SYS_exit_group, status); | |
70 | + | __builtin_unreachable(); | |
71 | + | } | |
72 | + | ||
73 | + | static void futex_wait(int *futex, int expect) { | |
74 | + | SYSCALL4(SYS_futex, futex, FUTEX_WAIT, expect, 0); | |
75 | + | } | |
76 | + | ||
77 | + | static void futex_wake(int *futex) { | |
78 | + | SYSCALL3(SYS_futex, futex, FUTEX_WAKE, 0x7fffffff); | |
79 | + | } | |
80 | + | ||
81 | + | /* The structure must have a 16-byte alignment on all architectures */ | |
82 | + | struct __attribute((aligned(16))) stack_head { | |
83 | + | void (*entry)(struct stack_head*); // The entry point pointer. Will receive a pointer to its own stack_head | |
84 | + | ||
85 | + | /* The rest of the arguments can be filled with any thread-local | |
86 | + | data we want. The following is just an example! */ | |
87 | + | char *message; | |
88 | + | long message_length; | |
89 | + | int print_count; | |
90 | + | int join_futex; // Join "futex" to help us "wait" to "join" that thread later | |
91 | + | }; | |
92 | + | ||
93 | + | typedef struct stack_head stack_head; | |
94 | + | ||
95 | + | __attribute((naked)) static long newthread(CloneArgs* args) { | |
96 | + | __asm volatile ( | |
97 | + | /* "args" is already in "rdi" */ | |
98 | + | "mov $88, %%esi\n" // arg2 = size (always "88" until further notice) | |
99 | + | "mov $435, %%eax\n" // SYS_clone | |
100 | + | "syscall\n" | |
101 | + | "mov %%rsp, %%rdi\n" // entry point argument | |
102 | + | "ret\n" | |
103 | + | : : : "rax", "rcx", "rsi", "rdi", "r11", "memory" | |
104 | + | ); | |
105 | + | } | |
106 | + | ||
107 | + | static void threadentry(stack_head *stack) { | |
108 | + | char *message = stack->message; | |
109 | + | int length = stack->message_length; | |
110 | + | int count = stack->print_count; | |
111 | + | for (int i = 0; i < count; i++) { | |
112 | + | fullwrite(1, message, length); | |
113 | + | millisleep(25); | |
114 | + | } | |
115 | + | ||
116 | + | __atomic_store_n(&stack->join_futex, 1, __ATOMIC_SEQ_CST); | |
117 | + | futex_wake(&stack->join_futex); | |
118 | + | exit(0); | |
119 | + | } | |
120 | + | ||
121 | + | static stack_head *newstack(long size) { | |
122 | + | unsigned long p = SYSCALL6(SYS_mmap, 0, size, 3, 0x22, -1, 0); | |
123 | + | if (p > -4096UL) { return 0; } | |
124 | + | ||
125 | + | long count = size / sizeof(stack_head); | |
126 | + | return (stack_head*)p + count - 1; | |
127 | + | } | |
128 | + | ||
129 | + | __attribute((force_align_arg_pointer)) void _start(void) { | |
130 | + | stack_head *stack = newstack(1<<16); | |
131 | + | stack->entry = threadentry; | |
132 | + | ||
133 | + | // Thread data | |
134 | + | stack->message = "hello world\n"; | |
135 | + | stack->message_length = 12; | |
136 | + | stack->print_count = 20; | |
137 | + | ||
138 | + | stack->join_futex = 0; // Set the "futex" | |
139 | + | ||
140 | + | CloneArgs args = { | |
141 | + | CLONE_VM, | |
142 | + | 0, 0, 0, SIGCHLD /* I have also tried using "0" here */, | |
143 | + | (__aligned_u64)&stack, /* stack */ | |
144 | + | 1 << 16, /* stack_size */ | |
145 | + | 0, 0, 0, 0 | |
146 | + | }; | |
147 | + | ||
148 | + | newthread(&args); | |
149 | + | ||
150 | + | futex_wait(&stack->join_futex, 0); | |
151 | + | exit_group(0); | |
152 | + | } |