stack_head_clone3_futex.c
· 4.1 KiB · C
Raw
// libc-free x86-64 Linux multi-threading example
// $ cc -nostdlib stack_head.c
// Ref: https://nullprogram.com/blog/2023/03/23/
// This is free and unencumbered software released into the public domain.
#include "clone_args.h"
#define SYS_write 1
#define SYS_mmap 9
#define SYS_nanosleep 35
#define SYS_clone 56
#define SYS_exit 60
#define SYS_futex 202
#define SYS_exit_group 231
#define FUTEX_WAIT 0
#define FUTEX_WAKE 1
#define SYSCALL1(n, a) \
syscall6(n,(long)(a),0,0,0,0,0)
#define SYSCALL2(n, a, b) \
syscall6(n,(long)(a),(long)(b),0,0,0,0)
#define SYSCALL3(n, a, b, c) \
syscall6(n,(long)(a),(long)(b),(long)(c),0,0,0)
#define SYSCALL4(n, a, b, c, d) \
syscall6(n,(long)(a),(long)(b),(long)(c),(long)(d),0,0)
#define SYSCALL5(n, a, b, c, d, e) \
syscall6(n,(long)(a),(long)(b),(long)(c),(long)(d),(long)(e),0)
#define SYSCALL6(n, a, b, c, d, e, f) \
syscall6(n,(long)(a),(long)(b),(long)(c),(long)(d),(long)(e),(long)(f))
static long syscall6(long n, long a, long b, long c, long d, long e, long f) {
register long ret;
register long r10 asm("r10") = d;
register long r8 asm("r8") = e;
register long r9 asm("r9") = f;
__asm volatile (
"syscall"
: "=a"(ret)
: "a"(n), "D"(a), "S"(b), "d"(c), "r"(r10), "r"(r8), "r"(r9)
: "rcx", "r11", "memory"
);
return ret;
}
static void millisleep(int ms) {
long ts[] = {ms/1000, ms%1000 * 1000000L};
SYSCALL2(SYS_nanosleep, ts, ts);
}
/* int num = 65; */ // Ignore this, used for my own personal test
static long fullwrite(int fd, void *buf, long len) {
for (long off = 0; off < len;) {
long r = SYSCALL3(SYS_write, fd, buf+off, len-off);
if (r < 0) { return r; }
off += r;
}
return len;
}
__attribute((noreturn)) static void exit(int status) {
SYSCALL1(SYS_exit, status);
__builtin_unreachable();
}
__attribute((noreturn)) static void exit_group(int status) {
SYSCALL1(SYS_exit_group, status);
__builtin_unreachable();
}
static void futex_wait(int *futex, int expect) {
SYSCALL4(SYS_futex, futex, FUTEX_WAIT, expect, 0);
}
static void futex_wake(int *futex) {
SYSCALL3(SYS_futex, futex, FUTEX_WAKE, 0x7fffffff);
}
/* The structure must have a 16-byte alignment on all architectures */
struct __attribute((aligned(16))) stack_head {
void (*entry)(struct stack_head*); // The entry point pointer. Will receive a pointer to its own stack_head
/* The rest of the arguments can be filled with any thread-local
data we want. The following is just an example! */
char *message;
long message_length;
int print_count;
int join_futex; // Join "futex" to help us "wait" to "join" that thread later
};
typedef struct stack_head stack_head;
__attribute((naked)) static long newthread(CloneArgs* args) {
__asm volatile (
/* "args" is already in "rdi" */
"mov $88, %%esi\n" // arg2 = size (always "88" until further notice)
"mov $435, %%eax\n" // SYS_clone
"syscall\n"
"mov %%rsp, %%rdi\n" // entry point argument
"ret\n"
: : : "rax", "rcx", "rsi", "rdi", "r11", "memory"
);
}
static void threadentry(stack_head *stack) {
char *message = stack->message;
int length = stack->message_length;
int count = stack->print_count;
for (int i = 0; i < count; i++) {
fullwrite(1, message, length);
millisleep(25);
}
__atomic_store_n(&stack->join_futex, 1, __ATOMIC_SEQ_CST);
futex_wake(&stack->join_futex);
exit(0);
}
static stack_head *newstack(long size) {
unsigned long p = SYSCALL6(SYS_mmap, 0, size, 3, 0x22, -1, 0);
if (p > -4096UL) { return 0; }
long count = size / sizeof(stack_head);
return (stack_head*)p + count - 1;
}
__attribute((force_align_arg_pointer)) void _start(void) {
stack_head *stack = newstack(1<<16);
stack->entry = threadentry;
// Thread data
stack->message = "hello world\n";
stack->message_length = 12;
stack->print_count = 20;
stack->join_futex = 0; // Set the "futex"
CloneArgs args = {
CLONE_VM,
0, 0, 0, SIGCHLD /* I have also tried using "0" here */,
(__aligned_u64)&stack, /* stack */
1 << 16, /* stack_size */
0, 0, 0, 0
};
newthread(&args);
futex_wait(&stack->join_futex, 0);
exit_group(0);
}
1 | // libc-free x86-64 Linux multi-threading example |
2 | // $ cc -nostdlib stack_head.c |
3 | // Ref: https://nullprogram.com/blog/2023/03/23/ |
4 | // This is free and unencumbered software released into the public domain. |
5 | |
6 | #include "clone_args.h" |
7 | |
8 | #define SYS_write 1 |
9 | #define SYS_mmap 9 |
10 | #define SYS_nanosleep 35 |
11 | #define SYS_clone 56 |
12 | #define SYS_exit 60 |
13 | #define SYS_futex 202 |
14 | #define SYS_exit_group 231 |
15 | |
16 | #define FUTEX_WAIT 0 |
17 | #define FUTEX_WAKE 1 |
18 | |
19 | #define SYSCALL1(n, a) \ |
20 | syscall6(n,(long)(a),0,0,0,0,0) |
21 | #define SYSCALL2(n, a, b) \ |
22 | syscall6(n,(long)(a),(long)(b),0,0,0,0) |
23 | #define SYSCALL3(n, a, b, c) \ |
24 | syscall6(n,(long)(a),(long)(b),(long)(c),0,0,0) |
25 | #define SYSCALL4(n, a, b, c, d) \ |
26 | syscall6(n,(long)(a),(long)(b),(long)(c),(long)(d),0,0) |
27 | #define SYSCALL5(n, a, b, c, d, e) \ |
28 | syscall6(n,(long)(a),(long)(b),(long)(c),(long)(d),(long)(e),0) |
29 | #define SYSCALL6(n, a, b, c, d, e, f) \ |
30 | syscall6(n,(long)(a),(long)(b),(long)(c),(long)(d),(long)(e),(long)(f)) |
31 | |
32 | static long syscall6(long n, long a, long b, long c, long d, long e, long f) { |
33 | register long ret; |
34 | register long r10 asm("r10") = d; |
35 | register long r8 asm("r8") = e; |
36 | register long r9 asm("r9") = f; |
37 | __asm volatile ( |
38 | "syscall" |
39 | : "=a"(ret) |
40 | : "a"(n), "D"(a), "S"(b), "d"(c), "r"(r10), "r"(r8), "r"(r9) |
41 | : "rcx", "r11", "memory" |
42 | ); |
43 | return ret; |
44 | } |
45 | |
46 | static void millisleep(int ms) { |
47 | long ts[] = {ms/1000, ms%1000 * 1000000L}; |
48 | SYSCALL2(SYS_nanosleep, ts, ts); |
49 | } |
50 | |
51 | /* int num = 65; */ // Ignore this, used for my own personal test |
52 | |
53 | static long fullwrite(int fd, void *buf, long len) { |
54 | for (long off = 0; off < len;) { |
55 | long r = SYSCALL3(SYS_write, fd, buf+off, len-off); |
56 | if (r < 0) { return r; } |
57 | off += r; |
58 | } |
59 | |
60 | return len; |
61 | } |
62 | |
63 | __attribute((noreturn)) static void exit(int status) { |
64 | SYSCALL1(SYS_exit, status); |
65 | __builtin_unreachable(); |
66 | } |
67 | |
68 | __attribute((noreturn)) static void exit_group(int status) { |
69 | SYSCALL1(SYS_exit_group, status); |
70 | __builtin_unreachable(); |
71 | } |
72 | |
73 | static void futex_wait(int *futex, int expect) { |
74 | SYSCALL4(SYS_futex, futex, FUTEX_WAIT, expect, 0); |
75 | } |
76 | |
77 | static void futex_wake(int *futex) { |
78 | SYSCALL3(SYS_futex, futex, FUTEX_WAKE, 0x7fffffff); |
79 | } |
80 | |
81 | /* The structure must have a 16-byte alignment on all architectures */ |
82 | struct __attribute((aligned(16))) stack_head { |
83 | void (*entry)(struct stack_head*); // The entry point pointer. Will receive a pointer to its own stack_head |
84 | |
85 | /* The rest of the arguments can be filled with any thread-local |
86 | data we want. The following is just an example! */ |
87 | char *message; |
88 | long message_length; |
89 | int print_count; |
90 | int join_futex; // Join "futex" to help us "wait" to "join" that thread later |
91 | }; |
92 | |
93 | typedef struct stack_head stack_head; |
94 | |
95 | __attribute((naked)) static long newthread(CloneArgs* args) { |
96 | __asm volatile ( |
97 | /* "args" is already in "rdi" */ |
98 | "mov $88, %%esi\n" // arg2 = size (always "88" until further notice) |
99 | "mov $435, %%eax\n" // SYS_clone |
100 | "syscall\n" |
101 | "mov %%rsp, %%rdi\n" // entry point argument |
102 | "ret\n" |
103 | : : : "rax", "rcx", "rsi", "rdi", "r11", "memory" |
104 | ); |
105 | } |
106 | |
107 | static void threadentry(stack_head *stack) { |
108 | char *message = stack->message; |
109 | int length = stack->message_length; |
110 | int count = stack->print_count; |
111 | for (int i = 0; i < count; i++) { |
112 | fullwrite(1, message, length); |
113 | millisleep(25); |
114 | } |
115 | |
116 | __atomic_store_n(&stack->join_futex, 1, __ATOMIC_SEQ_CST); |
117 | futex_wake(&stack->join_futex); |
118 | exit(0); |
119 | } |
120 | |
121 | static stack_head *newstack(long size) { |
122 | unsigned long p = SYSCALL6(SYS_mmap, 0, size, 3, 0x22, -1, 0); |
123 | if (p > -4096UL) { return 0; } |
124 | |
125 | long count = size / sizeof(stack_head); |
126 | return (stack_head*)p + count - 1; |
127 | } |
128 | |
129 | __attribute((force_align_arg_pointer)) void _start(void) { |
130 | stack_head *stack = newstack(1<<16); |
131 | stack->entry = threadentry; |
132 | |
133 | // Thread data |
134 | stack->message = "hello world\n"; |
135 | stack->message_length = 12; |
136 | stack->print_count = 20; |
137 | |
138 | stack->join_futex = 0; // Set the "futex" |
139 | |
140 | CloneArgs args = { |
141 | CLONE_VM, |
142 | 0, 0, 0, SIGCHLD /* I have also tried using "0" here */, |
143 | (__aligned_u64)&stack, /* stack */ |
144 | 1 << 16, /* stack_size */ |
145 | 0, 0, 0, 0 |
146 | }; |
147 | |
148 | newthread(&args); |
149 | |
150 | futex_wait(&stack->join_futex, 0); |
151 | exit_group(0); |
152 | } |