Last active 1699737790

Stack head with clone3 (futex version)

rempas revised this gist 1699737790. Go to revision

1 file changed, 23 insertions, 13 deletions

stack_head_clone3_futex.c

@@ -92,6 +92,11 @@ struct __attribute((aligned(16))) stack_head {
92 92
93 93 typedef struct stack_head stack_head;
94 94
95 + typedef struct {
96 + void *tail;
97 + stack_head *head;
98 + } thread_stack;
99 +
95 100 __attribute((naked)) static long newthread(CloneArgs* args) {
96 101 __asm volatile (
97 102 /* "args" is already in "rdi" */
@@ -118,35 +123,40 @@ static void threadentry(stack_head *stack) {
118 123 exit(0);
119 124 }
120 125
121 - static stack_head *newstack(long size) {
126 + static thread_stack newstack(long size) {
122 127 unsigned long p = SYSCALL6(SYS_mmap, 0, size, 3, 0x22, -1, 0);
123 - if (p > -4096UL) { return 0; }
124 128
125 - long count = size / sizeof(stack_head);
126 - return (stack_head*)p + count - 1;
129 + if (p > -4096UL) { /* Return "null" on failure */
130 + thread_stack stack = { (void*)0, (void*)0 };
131 + return stack;
132 + }
133 +
134 + long count = size / sizeof(thread_stack);
135 + thread_stack stack = { (void*)(p + count - 1), (void*)count };
136 + return stack;
127 137 }
128 138
129 139 __attribute((force_align_arg_pointer)) void _start(void) {
130 - stack_head *stack = newstack(1<<16);
131 - stack->entry = threadentry;
140 + thread_stack stack = newstack(1<<16);
141 + stack.head->entry = threadentry;
132 142
133 143 // Thread data
134 - stack->message = "hello world\n";
135 - stack->message_length = 12;
136 - stack->print_count = 20;
144 + stack.head->message = "hello world\n";
145 + stack.head->message_length = 12;
146 + stack.head->print_count = 20;
137 147
138 - stack->join_futex = 0; // Set the "futex"
148 + stack.head->join_futex = 0; // Set the "futex"
139 149
140 150 CloneArgs args = {
141 151 CLONE_VM,
142 152 0, 0, 0, SIGCHLD /* I have also tried using "0" here */,
143 - (__aligned_u64)&stack, /* stack */
144 - 1 << 16, /* stack_size */
153 + (__aligned_u64)stack.tail, /* stack */
154 + (void *)stack.head - stack.tail, /* stack_size */
145 155 0, 0, 0, 0
146 156 };
147 157
148 158 newthread(&args);
149 159
150 - futex_wait(&stack->join_futex, 0);
160 + futex_wait(&stack.head->join_futex, 0);
151 161 exit_group(0);
152 162 }

rempas revised this gist 1698841337. Go to revision

No changes

rempas revised this gist 1698841313. Go to revision

1 file changed, 152 insertions

stack_head_clone3_futex.c(file created)

@@ -0,0 +1,152 @@
1 + // libc-free x86-64 Linux multi-threading example
2 + // $ cc -nostdlib stack_head.c
3 + // Ref: https://nullprogram.com/blog/2023/03/23/
4 + // This is free and unencumbered software released into the public domain.
5 +
6 + #include "clone_args.h"
7 +
8 + #define SYS_write 1
9 + #define SYS_mmap 9
10 + #define SYS_nanosleep 35
11 + #define SYS_clone 56
12 + #define SYS_exit 60
13 + #define SYS_futex 202
14 + #define SYS_exit_group 231
15 +
16 + #define FUTEX_WAIT 0
17 + #define FUTEX_WAKE 1
18 +
19 + #define SYSCALL1(n, a) \
20 + syscall6(n,(long)(a),0,0,0,0,0)
21 + #define SYSCALL2(n, a, b) \
22 + syscall6(n,(long)(a),(long)(b),0,0,0,0)
23 + #define SYSCALL3(n, a, b, c) \
24 + syscall6(n,(long)(a),(long)(b),(long)(c),0,0,0)
25 + #define SYSCALL4(n, a, b, c, d) \
26 + syscall6(n,(long)(a),(long)(b),(long)(c),(long)(d),0,0)
27 + #define SYSCALL5(n, a, b, c, d, e) \
28 + syscall6(n,(long)(a),(long)(b),(long)(c),(long)(d),(long)(e),0)
29 + #define SYSCALL6(n, a, b, c, d, e, f) \
30 + syscall6(n,(long)(a),(long)(b),(long)(c),(long)(d),(long)(e),(long)(f))
31 +
32 + static long syscall6(long n, long a, long b, long c, long d, long e, long f) {
33 + register long ret;
34 + register long r10 asm("r10") = d;
35 + register long r8 asm("r8") = e;
36 + register long r9 asm("r9") = f;
37 + __asm volatile (
38 + "syscall"
39 + : "=a"(ret)
40 + : "a"(n), "D"(a), "S"(b), "d"(c), "r"(r10), "r"(r8), "r"(r9)
41 + : "rcx", "r11", "memory"
42 + );
43 + return ret;
44 + }
45 +
46 + static void millisleep(int ms) {
47 + long ts[] = {ms/1000, ms%1000 * 1000000L};
48 + SYSCALL2(SYS_nanosleep, ts, ts);
49 + }
50 +
51 + /* int num = 65; */ // Ignore this, used for my own personal test
52 +
53 + static long fullwrite(int fd, void *buf, long len) {
54 + for (long off = 0; off < len;) {
55 + long r = SYSCALL3(SYS_write, fd, buf+off, len-off);
56 + if (r < 0) { return r; }
57 + off += r;
58 + }
59 +
60 + return len;
61 + }
62 +
63 + __attribute((noreturn)) static void exit(int status) {
64 + SYSCALL1(SYS_exit, status);
65 + __builtin_unreachable();
66 + }
67 +
68 + __attribute((noreturn)) static void exit_group(int status) {
69 + SYSCALL1(SYS_exit_group, status);
70 + __builtin_unreachable();
71 + }
72 +
73 + static void futex_wait(int *futex, int expect) {
74 + SYSCALL4(SYS_futex, futex, FUTEX_WAIT, expect, 0);
75 + }
76 +
77 + static void futex_wake(int *futex) {
78 + SYSCALL3(SYS_futex, futex, FUTEX_WAKE, 0x7fffffff);
79 + }
80 +
81 + /* The structure must have a 16-byte alignment on all architectures */
82 + struct __attribute((aligned(16))) stack_head {
83 + void (*entry)(struct stack_head*); // The entry point pointer. Will receive a pointer to its own stack_head
84 +
85 + /* The rest of the arguments can be filled with any thread-local
86 + data we want. The following is just an example! */
87 + char *message;
88 + long message_length;
89 + int print_count;
90 + int join_futex; // Join "futex" to help us "wait" to "join" that thread later
91 + };
92 +
93 + typedef struct stack_head stack_head;
94 +
95 + __attribute((naked)) static long newthread(CloneArgs* args) {
96 + __asm volatile (
97 + /* "args" is already in "rdi" */
98 + "mov $88, %%esi\n" // arg2 = size (always "88" until further notice)
99 + "mov $435, %%eax\n" // SYS_clone
100 + "syscall\n"
101 + "mov %%rsp, %%rdi\n" // entry point argument
102 + "ret\n"
103 + : : : "rax", "rcx", "rsi", "rdi", "r11", "memory"
104 + );
105 + }
106 +
107 + static void threadentry(stack_head *stack) {
108 + char *message = stack->message;
109 + int length = stack->message_length;
110 + int count = stack->print_count;
111 + for (int i = 0; i < count; i++) {
112 + fullwrite(1, message, length);
113 + millisleep(25);
114 + }
115 +
116 + __atomic_store_n(&stack->join_futex, 1, __ATOMIC_SEQ_CST);
117 + futex_wake(&stack->join_futex);
118 + exit(0);
119 + }
120 +
121 + static stack_head *newstack(long size) {
122 + unsigned long p = SYSCALL6(SYS_mmap, 0, size, 3, 0x22, -1, 0);
123 + if (p > -4096UL) { return 0; }
124 +
125 + long count = size / sizeof(stack_head);
126 + return (stack_head*)p + count - 1;
127 + }
128 +
129 + __attribute((force_align_arg_pointer)) void _start(void) {
130 + stack_head *stack = newstack(1<<16);
131 + stack->entry = threadentry;
132 +
133 + // Thread data
134 + stack->message = "hello world\n";
135 + stack->message_length = 12;
136 + stack->print_count = 20;
137 +
138 + stack->join_futex = 0; // Set the "futex"
139 +
140 + CloneArgs args = {
141 + CLONE_VM,
142 + 0, 0, 0, SIGCHLD /* I have also tried using "0" here */,
143 + (__aligned_u64)&stack, /* stack */
144 + 1 << 16, /* stack_size */
145 + 0, 0, 0, 0
146 + };
147 +
148 + newthread(&args);
149 +
150 + futex_wait(&stack->join_futex, 0);
151 + exit_group(0);
152 + }
Newer Older