stack_head_clone3_waitid.c
· 3.9 KiB · C
Raw
// libc-free x86-64 Linux multi-threading example
// $ cc -nostdlib stack_head.c
// Ref: https://nullprogram.com/blog/2023/03/23/
// This is free and unencumbered software released into the public domain.
//
#include "clone_args.h"
#define SYS_write 1
#define SYS_mmap 9
#define SYS_nanosleep 35
#define SYS_clone 56
#define SYS_exit 60
#define SYS_waitid 247
#define SYS_exit_group 231
#define SYSCALL1(n, a) \
syscall6(n,(long)(a),0,0,0,0,0)
#define SYSCALL2(n, a, b) \
syscall6(n,(long)(a),(long)(b),0,0,0,0)
#define SYSCALL3(n, a, b, c) \
syscall6(n,(long)(a),(long)(b),(long)(c),0,0,0)
#define SYSCALL4(n, a, b, c, d) \
syscall6(n,(long)(a),(long)(b),(long)(c),(long)(d),0,0)
#define SYSCALL5(n, a, b, c, d, e) \
syscall6(n,(long)(a),(long)(b),(long)(c),(long)(d),(long)(e),0)
#define SYSCALL6(n, a, b, c, d, e, f) \
syscall6(n,(long)(a),(long)(b),(long)(c),(long)(d),(long)(e),(long)(f))
static long syscall6(long n, long a, long b, long c, long d, long e, long f) {
register long ret;
register long r10 asm("r10") = d;
register long r8 asm("r8") = e;
register long r9 asm("r9") = f;
__asm volatile (
"syscall"
: "=a"(ret)
: "a"(n), "D"(a), "S"(b), "d"(c), "r"(r10), "r"(r8), "r"(r9)
: "rcx", "r11", "memory"
);
return ret;
}
static void millisleep(int ms) {
long ts[] = {ms/1000, ms%1000 * 1000000L};
SYSCALL2(SYS_nanosleep, ts, ts);
}
/* int num = 65; */ // Ignore this, used for my own personal test
static long fullwrite(int fd, void *buf, long len) {
for (long off = 0; off < len;) {
long r = SYSCALL3(SYS_write, fd, buf+off, len-off);
if (r < 0) { return r; }
off += r;
}
return len;
}
__attribute((noreturn)) static void exit(int status) {
SYSCALL1(SYS_exit, status);
__builtin_unreachable();
}
__attribute((noreturn)) static void exit_group(int status) {
SYSCALL1(SYS_exit_group, status);
__builtin_unreachable();
}
long sys_waitid() {
struct siginfo s;
return SYSCALL4(SYS_waitid, 0 /* P_ALL */, 0, &s, 0x40000000 /* __WALL */);
}
/* The structure must have a 16-byte alignment on all architectures */
struct __attribute((aligned(16))) stack_head {
void (*entry)(struct stack_head*); // The entry point pointer. Will receive a pointer to its own stack_head
/* The rest of the arguments can be filled with any thread-local
data we want. The following is just an example! */
char *message;
long message_length;
int print_count;
};
typedef struct stack_head stack_head;
__attribute((naked)) static long newthread(CloneArgs* args) {
__asm volatile (
/* "args" is already in "rdi" */
"mov $88, %%esi\n" // arg2 = size (always "88" until further notice)
"mov $435, %%eax\n" // SYS_clone3
"syscall\n"
"mov %%rsp, %%rdi\n" // entry point argument
"ret\n"
: : : "rax", "rcx", "rsi", "rdi", "r11", "memory"
);
}
static void threadentry(stack_head *stack) {
char *message = stack->message;
int length = stack->message_length;
int count = stack->print_count;
for (int i = 0; i < count; i++) {
fullwrite(1, message, length);
millisleep(25);
}
exit(0);
}
static stack_head *newstack(long size) {
unsigned long p = SYSCALL6(SYS_mmap, 0, size, 3, 0x22, -1, 0);
if (p > -4096UL) { return 0; }
long count = size / sizeof(stack_head);
return (stack_head*)p + count - 1;
}
__attribute((force_align_arg_pointer)) void _start(void) {
stack_head* stack = newstack(1<<16);
stack->entry = threadentry;
// Thread data
stack->message = "hello world\n";
stack->message_length = 12;
stack->print_count = 20;
CloneArgs args = {
CLONE_VM,
0, 0, 0, SIGCHLD /* I have also tried using "0" here */,
(__aligned_u64)stack, /* stack */
1 << 16, /* stack_size */
0, 0, 0, 0
};
newthread(&args);
/* Fails */
if (sys_waitid() != 0) { exit_group(10); };
// Try to sleep and it indeed sleeps but the thread doesn't write the message
/* millisleep(2000); */
exit_group(0);
}
1 | // libc-free x86-64 Linux multi-threading example |
2 | // $ cc -nostdlib stack_head.c |
3 | // Ref: https://nullprogram.com/blog/2023/03/23/ |
4 | // This is free and unencumbered software released into the public domain. |
5 | // |
6 | #include "clone_args.h" |
7 | |
8 | #define SYS_write 1 |
9 | #define SYS_mmap 9 |
10 | #define SYS_nanosleep 35 |
11 | #define SYS_clone 56 |
12 | #define SYS_exit 60 |
13 | #define SYS_waitid 247 |
14 | #define SYS_exit_group 231 |
15 | |
16 | #define SYSCALL1(n, a) \ |
17 | syscall6(n,(long)(a),0,0,0,0,0) |
18 | #define SYSCALL2(n, a, b) \ |
19 | syscall6(n,(long)(a),(long)(b),0,0,0,0) |
20 | #define SYSCALL3(n, a, b, c) \ |
21 | syscall6(n,(long)(a),(long)(b),(long)(c),0,0,0) |
22 | #define SYSCALL4(n, a, b, c, d) \ |
23 | syscall6(n,(long)(a),(long)(b),(long)(c),(long)(d),0,0) |
24 | #define SYSCALL5(n, a, b, c, d, e) \ |
25 | syscall6(n,(long)(a),(long)(b),(long)(c),(long)(d),(long)(e),0) |
26 | #define SYSCALL6(n, a, b, c, d, e, f) \ |
27 | syscall6(n,(long)(a),(long)(b),(long)(c),(long)(d),(long)(e),(long)(f)) |
28 | |
29 | static long syscall6(long n, long a, long b, long c, long d, long e, long f) { |
30 | register long ret; |
31 | register long r10 asm("r10") = d; |
32 | register long r8 asm("r8") = e; |
33 | register long r9 asm("r9") = f; |
34 | __asm volatile ( |
35 | "syscall" |
36 | : "=a"(ret) |
37 | : "a"(n), "D"(a), "S"(b), "d"(c), "r"(r10), "r"(r8), "r"(r9) |
38 | : "rcx", "r11", "memory" |
39 | ); |
40 | return ret; |
41 | } |
42 | |
43 | static void millisleep(int ms) { |
44 | long ts[] = {ms/1000, ms%1000 * 1000000L}; |
45 | SYSCALL2(SYS_nanosleep, ts, ts); |
46 | } |
47 | |
48 | /* int num = 65; */ // Ignore this, used for my own personal test |
49 | |
50 | static long fullwrite(int fd, void *buf, long len) { |
51 | for (long off = 0; off < len;) { |
52 | long r = SYSCALL3(SYS_write, fd, buf+off, len-off); |
53 | if (r < 0) { return r; } |
54 | off += r; |
55 | } |
56 | |
57 | return len; |
58 | } |
59 | |
60 | __attribute((noreturn)) static void exit(int status) { |
61 | SYSCALL1(SYS_exit, status); |
62 | __builtin_unreachable(); |
63 | } |
64 | |
65 | __attribute((noreturn)) static void exit_group(int status) { |
66 | SYSCALL1(SYS_exit_group, status); |
67 | __builtin_unreachable(); |
68 | } |
69 | |
70 | long sys_waitid() { |
71 | struct siginfo s; |
72 | return SYSCALL4(SYS_waitid, 0 /* P_ALL */, 0, &s, 0x40000000 /* __WALL */); |
73 | } |
74 | |
75 | /* The structure must have a 16-byte alignment on all architectures */ |
76 | struct __attribute((aligned(16))) stack_head { |
77 | void (*entry)(struct stack_head*); // The entry point pointer. Will receive a pointer to its own stack_head |
78 | |
79 | /* The rest of the arguments can be filled with any thread-local |
80 | data we want. The following is just an example! */ |
81 | char *message; |
82 | long message_length; |
83 | int print_count; |
84 | }; |
85 | |
86 | typedef struct stack_head stack_head; |
87 | |
88 | __attribute((naked)) static long newthread(CloneArgs* args) { |
89 | __asm volatile ( |
90 | /* "args" is already in "rdi" */ |
91 | "mov $88, %%esi\n" // arg2 = size (always "88" until further notice) |
92 | "mov $435, %%eax\n" // SYS_clone3 |
93 | "syscall\n" |
94 | "mov %%rsp, %%rdi\n" // entry point argument |
95 | "ret\n" |
96 | : : : "rax", "rcx", "rsi", "rdi", "r11", "memory" |
97 | ); |
98 | } |
99 | |
100 | static void threadentry(stack_head *stack) { |
101 | char *message = stack->message; |
102 | int length = stack->message_length; |
103 | int count = stack->print_count; |
104 | for (int i = 0; i < count; i++) { |
105 | fullwrite(1, message, length); |
106 | millisleep(25); |
107 | } |
108 | |
109 | exit(0); |
110 | } |
111 | |
112 | static stack_head *newstack(long size) { |
113 | unsigned long p = SYSCALL6(SYS_mmap, 0, size, 3, 0x22, -1, 0); |
114 | if (p > -4096UL) { return 0; } |
115 | |
116 | long count = size / sizeof(stack_head); |
117 | return (stack_head*)p + count - 1; |
118 | } |
119 | |
120 | __attribute((force_align_arg_pointer)) void _start(void) { |
121 | stack_head* stack = newstack(1<<16); |
122 | stack->entry = threadentry; |
123 | |
124 | // Thread data |
125 | stack->message = "hello world\n"; |
126 | stack->message_length = 12; |
127 | stack->print_count = 20; |
128 | |
129 | CloneArgs args = { |
130 | CLONE_VM, |
131 | 0, 0, 0, SIGCHLD /* I have also tried using "0" here */, |
132 | (__aligned_u64)stack, /* stack */ |
133 | 1 << 16, /* stack_size */ |
134 | 0, 0, 0, 0 |
135 | }; |
136 | |
137 | newthread(&args); |
138 | |
139 | /* Fails */ |
140 | if (sys_waitid() != 0) { exit_group(10); }; |
141 | |
142 | // Try to sleep and it indeed sleeps but the thread doesn't write the message |
143 | /* millisleep(2000); */ |
144 | |
145 | exit_group(0); |
146 | } |