@@ -25,51 +25,56 @@ static void (*co_swap)(cothread_t, cothread_t) = 0;
2525
2626#ifdef _WIN32
2727/* ABI: Win64 */
28+ /* On windows handle is allocated by malloc and there it's guaranteed to
29+ have at least 16-byte alignment. Hence we don't need to align
30+ it in order to use movaps. */
2831static unsigned char co_swap_function [] = {
29- 0x48 , 0x89 , 0x22 , /* mov [rdx],rsp */
30- 0x48 , 0x8b , 0x21 , /* mov rsp,[rcx] */
31- 0x58 , /* pop rax */
32- 0x48 , 0x89 , 0x6a , 0x08 , /* mov [rdx+0x8],rbp */
33- 0x48 , 0x89 , 0x72 , 0x10 , /* mov [rdx+0x10],rsi */
34- 0x48 , 0x89 , 0x7a , 0x18 , /* mov [rdx+0x18],rdi */
35- 0x48 , 0x89 , 0x5a , 0x20 , /* mov [rdx+0x20],rbx */
36- 0x4c , 0x89 , 0x62 , 0x28 , /* mov [rdx+0x28],r12 */
37- 0x4c , 0x89 , 0x6a , 0x30 , /* mov [rdx+0x30],r13 */
38- 0x4c , 0x89 , 0x72 , 0x38 , /* mov [rdx+0x38],r14 */
39- 0x4c , 0x89 , 0x7a , 0x40 , /* mov [rdx+0x40],r15 */
40- 0x48 , 0x81 , 0xc2 , 0x80 , 0x00 , 0x00 , 0x00 , /* add rdx,0x80 */
41- 0x48 , 0x83 , 0xe2 , 0xf0 , /* and rdx,-0x10 */
42- 0x0f , 0x29 , 0x32 , /* movaps [rdx],xmm6 */
43- 0x0f , 0x29 , 0x7a , 0x10 , /* movaps [rdx+0x10],xmm7 */
44- 0x44 , 0x0f , 0x29 , 0x42 , 0x20 , /* movaps [rdx+0x20],xmm8 */
45- 0x44 , 0x0f , 0x29 , 0x4a , 0x30 , /* movaps [rdx+0x30],xmm9 */
46- 0x44 , 0x0f , 0x29 , 0x52 , 0x40 , /* movaps [rdx+0x40],xmm10 */
47- 0x44 , 0x0f , 0x29 , 0x5a , 0x50 , /* movaps [rdx+0x50],xmm11 */
48- 0x44 , 0x0f , 0x29 , 0x62 , 0x60 , /* movaps [rdx+0x60],xmm12 */
49- 0x44 , 0x0f , 0x29 , 0x6a , 0x70 , /* movaps [rdx+0x70],xmm13 */
50- 0x44 , 0x0f , 0x29 , 0xb2 , 0x80 , 0x00 , 0x00 , 0x00 , /* movaps [rdx+0x80],xmm14 */
51- 0x44 , 0x0f , 0x29 , 0xba , 0x90 , 0x00 , 0x00 , 0x00 , /* movaps [rdx+0x90],xmm15 */
52- 0x48 , 0x8b , 0x69 , 0x08 , /* mov rbp,[rcx+0x8] */
53- 0x48 , 0x8b , 0x71 , 0x10 , /* mov rsi,[rcx+0x10] */
54- 0x48 , 0x8b , 0x79 , 0x18 , /* mov rdi,[rcx+0x18] */
55- 0x48 , 0x8b , 0x59 , 0x20 , /* mov rbx,[rcx+0x20] */
56- 0x4c , 0x8b , 0x61 , 0x28 , /* mov r12,[rcx+0x28] */
57- 0x4c , 0x8b , 0x69 , 0x30 , /* mov r13,[rcx+0x30] */
58- 0x4c , 0x8b , 0x71 , 0x38 , /* mov r14,[rcx+0x38] */
59- 0x4c , 0x8b , 0x79 , 0x40 , /* mov r15,[rcx+0x40] */
60- 0x48 , 0x81 , 0xc1 , 0x80 , 0x00 , 0x00 , 0x00 , /* add rcx,0x80 */
61- 0x48 , 0x83 , 0xe1 , 0xf0 , /* and rcx,-0x10 */
62- 0x0f , 0x29 , 0x31 , /* movaps [rcx],xmm6 */
63- 0x0f , 0x29 , 0x79 , 0x10 , /* movaps [rcx+0x10],xmm7 */
64- 0x44 , 0x0f , 0x29 , 0x41 , 0x20 , /* movaps [rcx+0x20],xmm8 */
65- 0x44 , 0x0f , 0x29 , 0x49 , 0x30 , /* movaps [rcx+0x30],xmm9 */
66- 0x44 , 0x0f , 0x29 , 0x51 , 0x40 , /* movaps [rcx+0x40],xmm10 */
67- 0x44 , 0x0f , 0x29 , 0x59 , 0x50 , /* movaps [rcx+0x50],xmm11 */
68- 0x44 , 0x0f , 0x29 , 0x61 , 0x60 , /* movaps [rcx+0x60],xmm12 */
69- 0x44 , 0x0f , 0x29 , 0x69 , 0x70 , /* movaps [rcx+0x70],xmm13 */
70- 0x44 , 0x0f , 0x29 , 0xb1 , 0x80 , 0x00 , 0x00 , 0x00 , /* movaps [rcx+0x80],xmm14 */
71- 0x44 , 0x0f , 0x29 , 0xb9 , 0x90 , 0x00 , 0x00 , 0x00 , /* movaps [rcx+0x90],xmm15 */
72- 0xff , 0xe0 , /* jmp rax */
32+ 0x48 , 0x89 , 0x22 , /* mov [rdx],rsp */
33+ 0x48 , 0x8b , 0x21 , /* mov rsp,[rcx] */
34+ 0x58 , /* pop rax */
35+ 0x48 , 0x89 , 0x6a , 0x08 , /* mov [rdx+ 8],rbp */
36+ 0x48 , 0x89 , 0x72 , 0x10 , /* mov [rdx+16],rsi */
37+ 0x48 , 0x89 , 0x7a , 0x18 , /* mov [rdx+24],rdi */
38+ 0x48 , 0x89 , 0x5a , 0x20 , /* mov [rdx+32],rbx */
39+ 0x4c , 0x89 , 0x62 , 0x28 , /* mov [rdx+40],r12 */
40+ 0x4c , 0x89 , 0x6a , 0x30 , /* mov [rdx+48],r13 */
41+ 0x4c , 0x89 , 0x72 , 0x38 , /* mov [rdx+56],r14 */
42+ 0x4c , 0x89 , 0x7a , 0x40 , /* mov [rdx+64],r15 */
43+ #if !defined (LIBCO_NO_SSE )
44+ 0x0f , 0x29 , 0x72 , 0x50 , /* movaps [rdx+ 80],xmm6 */
45+ 0x0f , 0x29 , 0x7a , 0x60 , /* movaps [rdx+ 96],xmm7 */
46+ 0x44 , 0x0f , 0x29 , 0x42 , 0x70 , /* movaps [rdx+112],xmm8 */
47+ 0x48 , 0x83 , 0xc2 , 0x70 , /* add rdx,112 */
48+ 0x44 , 0x0f , 0x29 , 0x4a , 0x10 , /* movaps [rdx+ 16],xmm9 */
49+ 0x44 , 0x0f , 0x29 , 0x52 , 0x20 , /* movaps [rdx+ 32],xmm10 */
50+ 0x44 , 0x0f , 0x29 , 0x5a , 0x30 , /* movaps [rdx+ 48],xmm11 */
51+ 0x44 , 0x0f , 0x29 , 0x62 , 0x40 , /* movaps [rdx+ 64],xmm12 */
52+ 0x44 , 0x0f , 0x29 , 0x6a , 0x50 , /* movaps [rdx+ 80],xmm13 */
53+ 0x44 , 0x0f , 0x29 , 0x72 , 0x60 , /* movaps [rdx+ 96],xmm14 */
54+ 0x44 , 0x0f , 0x29 , 0x7a , 0x70 , /* movaps [rdx+112],xmm15 */
55+ #endif
56+ 0x48 , 0x8b , 0x69 , 0x08 , /* mov rbp,[rcx+ 8] */
57+ 0x48 , 0x8b , 0x71 , 0x10 , /* mov rsi,[rcx+16] */
58+ 0x48 , 0x8b , 0x79 , 0x18 , /* mov rdi,[rcx+24] */
59+ 0x48 , 0x8b , 0x59 , 0x20 , /* mov rbx,[rcx+32] */
60+ 0x4c , 0x8b , 0x61 , 0x28 , /* mov r12,[rcx+40] */
61+ 0x4c , 0x8b , 0x69 , 0x30 , /* mov r13,[rcx+48] */
62+ 0x4c , 0x8b , 0x71 , 0x38 , /* mov r14,[rcx+56] */
63+ 0x4c , 0x8b , 0x79 , 0x40 , /* mov r15,[rcx+64] */
64+ #if !defined (LIBCO_NO_SSE )
65+ 0x0f , 0x28 , 0x71 , 0x50 , /* movaps xmm6, [rcx+ 80] */
66+ 0x0f , 0x28 , 0x79 , 0x60 , /* movaps xmm7, [rcx+ 96] */
67+ 0x44 , 0x0f , 0x28 , 0x41 , 0x70 , /* movaps xmm8, [rcx+112] */
68+ 0x48 , 0x83 , 0xc1 , 0x70 , /* add rcx,112 */
69+ 0x44 , 0x0f , 0x28 , 0x49 , 0x10 , /* movaps xmm9, [rcx+ 16] */
70+ 0x44 , 0x0f , 0x28 , 0x51 , 0x20 , /* movaps xmm10,[rcx+ 32] */
71+ 0x44 , 0x0f , 0x28 , 0x59 , 0x30 , /* movaps xmm11,[rcx+ 48] */
72+ 0x44 , 0x0f , 0x28 , 0x61 , 0x40 , /* movaps xmm12,[rcx+ 64] */
73+ 0x44 , 0x0f , 0x28 , 0x69 , 0x50 , /* movaps xmm13,[rcx+ 80] */
74+ 0x44 , 0x0f , 0x28 , 0x71 , 0x60 , /* movaps xmm14,[rcx+ 96] */
75+ 0x44 , 0x0f , 0x28 , 0x79 , 0x70 , /* movaps xmm15,[rcx+112] */
76+ #endif
77+ 0xff , 0xe0 , /* jmp rax */
7378};
7479
7580#include <windows.h>
0 commit comments