Download raw body.
arm64 -fret-clean attempt
"Theo de Raadt" <deraadt@openbsd.org> writes: > I've been trying to write -fret-clean for arm64. > > On a return-stack architecture like amd64, the callee has to clean up the > word on the stack upon return. > > arm64, like some other risc architectures, is a link-register architecture. > In this case, the return address is saved in some temporary location by > the caller, who loads it into the link register before returning. Before > that moment, the caller has to clean it up. > > After running around in the swamp for a while, I came up with this variation > that inserts the correct instruction at the correct place. It works, as long > as the optimizer is turned off with -O0. I joined the swamp race and found some cases where I don't understand the transformation happening when compiling with ret-clean. As it happens, a crash can be reproduced by compiling tmux. It segv's when options.c is compiled with the new flag, so I started stairing at the assembly output for these two cases: % ~/s/llvm-project/build/bin/clang -mllvm -aarch64-ret-clean -O2 -S /usr/src/usr.bin/tmux/options.c -o bad-direct.s % ~/s/llvm-project/build/bin/clang -O2 -S /usr/src/usr.bin/tmux/options.c -o good-direct.s In case somebody wants to stare at the differences without waiting for clang to build, behold --- good-direct.s Sat Jul 13 00:30:39 2024 +++ bad-direct.s Sat Jul 13 00:29:50 2024 @@ -40,7 +40,8 @@ stp xzr, x19, [x0] .cfi_def_cfa wsp, 32 ldp x19, x15, [sp, #16] // 16-byte Folded Reload - ldp x29, x30, [sp], #32 // 16-byte Folded Reload + ldp x29, x30, [sp] // 16-byte Folded Reload + str xzr, [sp, #32]! // 8-byte Folded Spill .cfi_def_cfa_offset 0 hint #29 .cfi_negate_ra_state @@ -136,7 +137,8 @@ mov x0, x19 .cfi_def_cfa wsp, 32 ldp x20, x19, [sp, #16] // 16-byte Folded Reload - ldp x29, x30, [sp], #32 // 16-byte Folded Reload + ldp x29, x30, [sp] // 16-byte Folded Reload + str xzr, [sp, #32]! // 8-byte Folded Spill .cfi_def_cfa_offset 0 hint #29 .cfi_negate_ra_state @@ -607,8 +609,9 @@ mov x0, x19 .cfi_def_cfa wsp, 48 ldp x20, x19, [sp, #32] // 16-byte Folded Reload + ldp x29, x30, [sp] // 16-byte Folded Reload ldr x21, [sp, #16] // 8-byte Folded Reload - ldp x29, x30, [sp], #48 // 16-byte Folded Reload + str xzr, [sp, #48]! // 8-byte Folded Spill .cfi_def_cfa_offset 0 hint #29 .cfi_negate_ra_state @@ -857,7 +860,8 @@ .cfi_def_cfa wsp, 48 ldp x19, x15, [sp, #32] // 16-byte Folded Reload ldp x21, x20, [sp, #16] // 16-byte Folded Reload - ldp x29, x30, [sp], #48 // 16-byte Folded Reload + ldp x29, x30, [sp] // 16-byte Folded Reload + str xzr, [sp, #48]! // 8-byte Folded Spill .cfi_def_cfa_offset 0 hint #29 .cfi_negate_ra_state @@ -922,8 +926,9 @@ .LBB8_4: // %while.end .cfi_def_cfa wsp, 48 ldp x19, x15, [sp, #32] // 16-byte Folded Reload + ldp x29, x30, [sp] // 16-byte Folded Reload ldr x20, [sp, #16] // 8-byte Folded Reload - ldp x29, x30, [sp], #48 // 16-byte Folded Reload + str xzr, [sp, #48]! // 8-byte Folded Spill .cfi_def_cfa_offset 0 hint #29 .cfi_negate_ra_state @@ -976,7 +981,8 @@ .LBB9_2: // %if.end .cfi_def_cfa wsp, 32 ldp x19, x15, [sp, #16] // 16-byte Folded Reload - ldp x29, x30, [sp], #32 // 16-byte Folded Reload + ldp x29, x30, [sp] // 16-byte Folded Reload + str xzr, [sp, #32]! // 8-byte Folded Spill .cfi_def_cfa_offset 0 hint #29 .cfi_negate_ra_state @@ -1267,8 +1273,9 @@ .cfi_def_cfa wsp, 64 ldp x19, x15, [sp, #48] // 16-byte Folded Reload ldp x21, x20, [sp, #32] // 16-byte Folded Reload + ldp x29, x30, [sp] // 16-byte Folded Reload ldr x22, [sp, #16] // 8-byte Folded Reload - ldp x29, x30, [sp], #64 // 16-byte Folded Reload + str xzr, [sp, #64]! // 8-byte Folded Spill .cfi_def_cfa_offset 0 hint #29 .cfi_negate_ra_state @@ -1370,7 +1377,8 @@ .cfi_def_cfa wsp, 48 ldp x19, x15, [sp, #32] // 16-byte Folded Reload ldp x21, x20, [sp, #16] // 16-byte Folded Reload - ldp x29, x30, [sp], #48 // 16-byte Folded Reload + ldp x29, x30, [sp] // 16-byte Folded Reload + str xzr, [sp, #48]! // 8-byte Folded Spill .cfi_def_cfa_offset 0 hint #29 .cfi_negate_ra_state @@ -1554,8 +1562,9 @@ .cfi_def_cfa wsp, 64 ldp x19, x15, [sp, #48] // 16-byte Folded Reload ldp x21, x20, [sp, #32] // 16-byte Folded Reload + ldp x29, x30, [sp] // 16-byte Folded Reload ldr x22, [sp, #16] // 8-byte Folded Reload - ldp x29, x30, [sp], #64 // 16-byte Folded Reload + str xzr, [sp, #64]! // 8-byte Folded Spill .cfi_def_cfa_offset 0 hint #29 .cfi_negate_ra_state @@ -1589,8 +1598,9 @@ .cfi_def_cfa wsp, 64 ldp x19, x15, [sp, #48] // 16-byte Folded Reload ldp x21, x20, [sp, #32] // 16-byte Folded Reload + ldp x29, x30, [sp] // 16-byte Folded Reload ldr x22, [sp, #16] // 8-byte Folded Reload - ldp x29, x30, [sp], #64 // 16-byte Folded Reload + str xzr, [sp, #64]! // 8-byte Folded Spill .cfi_def_cfa_offset 0 hint #29 .cfi_negate_ra_state @@ -1841,6 +1851,7 @@ ldp x21, x20, [sp, #48] // 16-byte Folded Reload ldp x23, x22, [sp, #32] // 16-byte Folded Reload ldp x29, x30, [sp, #16] // 16-byte Folded Reload + str xzr, [sp, #16] // 8-byte Folded Spill add sp, sp, #80 .cfi_def_cfa_offset 0 hint #29 @@ -1979,8 +1990,9 @@ bl xasprintf ldr x0, [x29, #24] .cfi_def_cfa wsp, 32 + ldp x29, x30, [sp] // 16-byte Folded Reload ldr x15, [sp, #16] // 8-byte Folded Reload - ldp x29, x30, [sp], #32 // 16-byte Folded Reload + str xzr, [sp, #32]! // 8-byte Folded Spill .cfi_def_cfa_offset 0 hint #29 .cfi_negate_ra_state @@ -2034,8 +2046,9 @@ ldr x0, [x9, x8, lsl #3] .cfi_def_cfa wsp, 32 .LBB14_8: // %sw.bb + ldp x29, x30, [sp] // 16-byte Folded Reload ldr x15, [sp, #16] // 8-byte Folded Reload - ldp x29, x30, [sp], #32 // 16-byte Folded Reload + str xzr, [sp, #32]! // 8-byte Folded Spill .cfi_def_cfa_offset 0 hint #29 .cfi_negate_ra_state @@ -2175,8 +2188,9 @@ .LBB18_5: // %cleanup .cfi_def_cfa wsp, 48 ldp x19, x15, [sp, #32] // 16-byte Folded Reload + ldp x29, x30, [sp] // 16-byte Folded Reload ldr x20, [sp, #16] // 8-byte Folded Reload - ldp x29, x30, [sp], #48 // 16-byte Folded Reload + str xzr, [sp, #48]! // 8-byte Folded Spill .cfi_def_cfa_offset 0 hint #29 .cfi_negate_ra_state @@ -2240,8 +2254,9 @@ mov x0, x19 .cfi_def_cfa wsp, 48 ldp x19, x15, [sp, #32] // 16-byte Folded Reload + ldp x29, x30, [sp] // 16-byte Folded Reload ldr x20, [sp, #16] // 8-byte Folded Reload - ldp x29, x30, [sp], #48 // 16-byte Folded Reload + str xzr, [sp, #48]! // 8-byte Folded Spill .cfi_def_cfa_offset 0 hint #29 .cfi_negate_ra_state @@ -2644,7 +2659,8 @@ mov x0, x19 .cfi_def_cfa wsp, 32 ldp x20, x19, [sp, #16] // 16-byte Folded Reload - ldp x29, x30, [sp], #32 // 16-byte Folded Reload + ldp x29, x30, [sp] // 16-byte Folded Reload + str xzr, [sp, #32]! // 8-byte Folded Spill .cfi_def_cfa_offset 0 hint #29 .cfi_negate_ra_state @@ -2969,8 +2985,9 @@ .LBB21_44: // %options_array_RB_INSERT.exit .cfi_def_cfa wsp, 48 ldp x19, x15, [sp, #32] // 16-byte Folded Reload + ldp x29, x30, [sp] // 16-byte Folded Reload ldr x20, [sp, #16] // 8-byte Folded Reload - ldp x29, x30, [sp], #48 // 16-byte Folded Reload + str xzr, [sp, #48]! // 8-byte Folded Spill .cfi_def_cfa_offset 0 hint #29 .cfi_negate_ra_state @@ -3036,8 +3053,9 @@ // %bb.5: // %if.then10 .cfi_def_cfa wsp, 48 ldp x19, x15, [sp, #32] // 16-byte Folded Reload + ldp x29, x30, [sp] // 16-byte Folded Reload ldr x20, [sp, #16] // 8-byte Folded Reload - ldp x29, x30, [sp], #48 // 16-byte Folded Reload + str xzr, [sp, #48]! // 8-byte Folded Spill .cfi_def_cfa_offset 0 hint #29 .cfi_negate_ra_state @@ -3051,8 +3069,9 @@ .cfi_restore_state .cfi_def_cfa wsp, 48 ldp x19, x15, [sp, #32] // 16-byte Folded Reload + ldp x29, x30, [sp] // 16-byte Folded Reload ldr x20, [sp, #16] // 8-byte Folded Reload - ldp x29, x30, [sp], #48 // 16-byte Folded Reload + str xzr, [sp, #48]! // 8-byte Folded Spill .cfi_def_cfa_offset 0 hint #29 .cfi_negate_ra_state @@ -3354,8 +3373,9 @@ ldr x0, [x1] mov w1, wzr .cfi_def_cfa wsp, 32 + ldp x29, x30, [sp] // 16-byte Folded Reload ldr x15, [sp, #16] // 8-byte Folded Reload - ldp x29, x30, [sp], #32 // 16-byte Folded Reload + str xzr, [sp, #32]! // 8-byte Folded Spill .cfi_def_cfa_offset 0 hint #29 .cfi_negate_ra_state @@ -3398,8 +3418,9 @@ ldr x0, [x1] .cfi_def_cfa wsp, 32 .LBB30_10: // %if.end54 + ldp x29, x30, [sp] // 16-byte Folded Reload ldr x15, [sp, #16] // 8-byte Folded Reload - ldp x29, x30, [sp], #32 // 16-byte Folded Reload + str xzr, [sp, #32]! // 8-byte Folded Spill .cfi_def_cfa_offset 0 hint #29 .cfi_negate_ra_state @@ -3451,8 +3472,9 @@ .LBB30_19: // %sw.epilog ldr x0, [x29, #24] .cfi_def_cfa wsp, 32 + ldp x29, x30, [sp] // 16-byte Folded Reload ldr x15, [sp, #16] // 8-byte Folded Reload - ldp x29, x30, [sp], #32 // 16-byte Folded Reload + str xzr, [sp, #32]! // 8-byte Folded Spill .cfi_def_cfa_offset 0 hint #29 .cfi_negate_ra_state @@ -3563,7 +3585,8 @@ .cfi_def_cfa wsp, 48 ldp x19, x15, [sp, #32] // 16-byte Folded Reload ldp x21, x20, [sp, #16] // 16-byte Folded Reload - ldp x29, x30, [sp], #48 // 16-byte Folded Reload + ldp x29, x30, [sp] // 16-byte Folded Reload + str xzr, [sp, #48]! // 8-byte Folded Spill .cfi_def_cfa_offset 0 hint #29 .cfi_negate_ra_state @@ -3652,8 +3675,9 @@ .cfi_def_cfa wsp, 64 ldp x19, x15, [sp, #48] // 16-byte Folded Reload ldp x21, x20, [sp, #32] // 16-byte Folded Reload + ldp x29, x30, [sp] // 16-byte Folded Reload ldr x22, [sp, #16] // 8-byte Folded Reload - ldp x29, x30, [sp], #64 // 16-byte Folded Reload + str xzr, [sp, #64]! // 8-byte Folded Spill .cfi_def_cfa_offset 0 hint #29 .cfi_negate_ra_state @@ -3797,8 +3821,9 @@ ldp x21, x20, [sp, #64] // 16-byte Folded Reload ldp x23, x22, [sp, #48] // 16-byte Folded Reload ldp x25, x24, [sp, #32] // 16-byte Folded Reload + ldp x29, x30, [sp] // 16-byte Folded Reload ldr x26, [sp, #16] // 8-byte Folded Reload - ldp x29, x30, [sp], #96 // 16-byte Folded Reload + str xzr, [sp, #96]! // 8-byte Folded Spill .cfi_def_cfa_offset 0 hint #29 .cfi_negate_ra_state @@ -3839,8 +3864,9 @@ ldp x21, x20, [sp, #64] // 16-byte Folded Reload ldp x23, x22, [sp, #48] // 16-byte Folded Reload ldp x25, x24, [sp, #32] // 16-byte Folded Reload + ldp x29, x30, [sp] // 16-byte Folded Reload ldr x26, [sp, #16] // 8-byte Folded Reload - ldp x29, x30, [sp], #96 // 16-byte Folded Reload + str xzr, [sp, #96]! // 8-byte Folded Spill .cfi_def_cfa_offset 0 hint #29 .cfi_negate_ra_state @@ -3930,8 +3956,9 @@ .cfi_def_cfa wsp, 64 ldp x19, x15, [sp, #48] // 16-byte Folded Reload ldp x21, x20, [sp, #32] // 16-byte Folded Reload + ldp x29, x30, [sp] // 16-byte Folded Reload ldr x22, [sp, #16] // 8-byte Folded Reload - ldp x29, x30, [sp], #64 // 16-byte Folded Reload + str xzr, [sp, #64]! // 8-byte Folded Spill .cfi_def_cfa_offset 0 hint #29 .cfi_negate_ra_state @@ -4002,8 +4029,9 @@ ldr x0, [x0, #24] .cfi_def_cfa wsp, 48 ldp x19, x15, [sp, #32] // 16-byte Folded Reload + ldp x29, x30, [sp] // 16-byte Folded Reload ldr x20, [sp, #16] // 8-byte Folded Reload - ldp x29, x30, [sp], #48 // 16-byte Folded Reload + str xzr, [sp, #48]! // 8-byte Folded Spill .cfi_def_cfa_offset 0 hint #29 .cfi_negate_ra_state @@ -4085,8 +4113,9 @@ ldr x0, [x0, #24] .cfi_def_cfa wsp, 48 ldp x19, x15, [sp, #32] // 16-byte Folded Reload + ldp x29, x30, [sp] // 16-byte Folded Reload ldr x20, [sp, #16] // 8-byte Folded Reload - ldp x29, x30, [sp], #48 // 16-byte Folded Reload + str xzr, [sp, #48]! // 8-byte Folded Spill .cfi_def_cfa_offset 0 hint #29 .cfi_negate_ra_state @@ -4240,6 +4269,7 @@ ldp x21, x20, [sp, #272] // 16-byte Folded Reload ldp x28, x22, [sp, #256] // 16-byte Folded Reload ldp x29, x30, [sp, #240] // 16-byte Folded Reload + str xzr, [sp, #240] // 8-byte Folded Spill add sp, sp, #304 .cfi_def_cfa_offset 0 hint #29 @@ -4374,8 +4404,9 @@ .cfi_def_cfa wsp, 64 ldp x19, x15, [sp, #48] // 16-byte Folded Reload ldp x21, x20, [sp, #32] // 16-byte Folded Reload + ldp x29, x30, [sp] // 16-byte Folded Reload ldr x22, [sp, #16] // 8-byte Folded Reload - ldp x29, x30, [sp], #64 // 16-byte Folded Reload + str xzr, [sp, #64]! // 8-byte Folded Spill .cfi_def_cfa_offset 0 hint #29 .cfi_negate_ra_state @@ -4475,8 +4506,9 @@ ldp x23, x22, [sp, #64] // 16-byte Folded Reload ldp x25, x24, [sp, #48] // 16-byte Folded Reload ldp x27, x26, [sp, #32] // 16-byte Folded Reload + ldp x29, x30, [sp] // 16-byte Folded Reload ldr x28, [sp, #16] // 8-byte Folded Reload - ldp x29, x30, [sp], #112 // 16-byte Folded Reload + str xzr, [sp, #112]! // 8-byte Folded Spill .cfi_def_cfa_offset 0 hint #29 .cfi_negate_ra_state @@ -4589,8 +4621,9 @@ ldp x23, x22, [sp, #64] // 16-byte Folded Reload ldp x25, x24, [sp, #48] // 16-byte Folded Reload ldp x27, x26, [sp, #32] // 16-byte Folded Reload + ldp x29, x30, [sp] // 16-byte Folded Reload ldr x28, [sp, #16] // 8-byte Folded Reload - ldp x29, x30, [sp], #112 // 16-byte Folded Reload + str xzr, [sp, #112]! // 8-byte Folded Spill .cfi_def_cfa_offset 0 hint #29 .cfi_negate_ra_state @@ -4748,8 +4781,9 @@ ldp x21, x20, [sp, #64] // 16-byte Folded Reload ldp x23, x22, [sp, #48] // 16-byte Folded Reload ldp x25, x24, [sp, #32] // 16-byte Folded Reload + ldp x29, x30, [sp] // 16-byte Folded Reload ldr x26, [sp, #16] // 8-byte Folded Reload - ldp x29, x30, [sp], #96 // 16-byte Folded Reload + str xzr, [sp, #96]! // 8-byte Folded Spill .cfi_def_cfa_offset 0 hint #29 .cfi_negate_ra_state @@ -4981,8 +5015,9 @@ .cfi_def_cfa wsp, 64 ldp x19, x15, [sp, #48] // 16-byte Folded Reload ldp x21, x20, [sp, #32] // 16-byte Folded Reload + ldp x29, x30, [sp] // 16-byte Folded Reload ldr x22, [sp, #16] // 8-byte Folded Reload - ldp x29, x30, [sp], #64 // 16-byte Folded Reload + str xzr, [sp, #64]! // 8-byte Folded Spill .cfi_def_cfa_offset 0 hint #29 .cfi_negate_ra_state @@ -5067,7 +5102,8 @@ ldp x19, x15, [sp, #48] // 16-byte Folded Reload ldp x21, x20, [sp, #32] // 16-byte Folded Reload ldp x23, x22, [sp, #16] // 16-byte Folded Reload - ldp x29, x30, [sp], #64 // 16-byte Folded Reload + ldp x29, x30, [sp] // 16-byte Folded Reload + str xzr, [sp, #64]! // 8-byte Folded Spill .cfi_def_cfa_offset 0 hint #29 .cfi_negate_ra_state @@ -5267,6 +5303,7 @@ ldp x23, x22, [sp, #128] // 16-byte Folded Reload ldp x25, x24, [sp, #112] // 16-byte Folded Reload ldp x29, x30, [sp, #96] // 16-byte Folded Reload + str xzr, [sp, #96] // 8-byte Folded Spill add sp, sp, #176 .cfi_def_cfa_offset 0 hint #29 @@ -5806,8 +5843,9 @@ .cfi_def_cfa wsp, 64 ldp x19, x15, [sp, #48] // 16-byte Folded Reload ldp x21, x20, [sp, #32] // 16-byte Folded Reload + ldp x29, x30, [sp] // 16-byte Folded Reload ldr x22, [sp, #16] // 8-byte Folded Reload - ldp x29, x30, [sp], #64 // 16-byte Folded Reload + str xzr, [sp, #64]! // 8-byte Folded Spill .cfi_def_cfa_offset 0 hint #29 .cfi_negate_ra_state @@ -6025,7 +6063,8 @@ .LBB45_20: // %cleanup .cfi_def_cfa wsp, 32 ldp x19, x15, [sp, #16] // 16-byte Folded Reload - ldp x29, x30, [sp], #32 // 16-byte Folded Reload + ldp x29, x30, [sp] // 16-byte Folded Reload + str xzr, [sp, #32]! // 8-byte Folded Spill .cfi_def_cfa_offset 0 hint #29 .cfi_negate_ra_state
arm64 -fret-clean attempt