Index | Thread | Search

From:
Greg Steuck <gnezdo@openbsd.org>
Subject:
Re: arm64 -fret-clean attempt
To:
"Theo de Raadt" <deraadt@openbsd.org>
Cc:
tech@cvs.openbsd.org
Date:
Sat, 13 Jul 2024 10:13:53 +0200

Download raw body.

Thread
"Theo de Raadt" <deraadt@openbsd.org> writes:

> I've been trying to write -fret-clean for arm64.
>
> On a return-stack architecture like amd64, the callee has to clean up the
> word on the stack upon return.
>
> arm64, like some other risc architectures, is a link-register architecture.
> In this case, the return address is saved in some temporary location by
> the caller, who loads it into the link register before returning.  Before
> that moment, the caller has to clean it up.
>
> After running around in the swamp for a while, I came up with this variation
> that inserts the correct instruction at the correct place.  It works, as long
> as the optimizer is turned off with -O0.

I joined the swamp race and found some cases where I don't understand
the transformation happening when compiling with ret-clean. As it
happens, a crash can be reproduced by compiling tmux. It segv's when
options.c is compiled with the new flag, so I started stairing at the
assembly output for these two cases:

% ~/s/llvm-project/build/bin/clang -mllvm -aarch64-ret-clean -O2 -S /usr/src/usr.bin/tmux/options.c -o bad-direct.s
% ~/s/llvm-project/build/bin/clang -O2 -S /usr/src/usr.bin/tmux/options.c -o good-direct.s

In case somebody wants to stare at the differences without waiting for
clang to build, behold

--- good-direct.s	Sat Jul 13 00:30:39 2024
+++ bad-direct.s	Sat Jul 13 00:29:50 2024
@@ -40,7 +40,8 @@
 	stp	xzr, x19, [x0]
 	.cfi_def_cfa wsp, 32
 	ldp	x19, x15, [sp, #16]             // 16-byte Folded Reload
-	ldp	x29, x30, [sp], #32             // 16-byte Folded Reload
+	ldp	x29, x30, [sp]                  // 16-byte Folded Reload
+	str	xzr, [sp, #32]!                 // 8-byte Folded Spill
 	.cfi_def_cfa_offset 0
 	hint	#29
 	.cfi_negate_ra_state
@@ -136,7 +137,8 @@
 	mov	x0, x19
 	.cfi_def_cfa wsp, 32
 	ldp	x20, x19, [sp, #16]             // 16-byte Folded Reload
-	ldp	x29, x30, [sp], #32             // 16-byte Folded Reload
+	ldp	x29, x30, [sp]                  // 16-byte Folded Reload
+	str	xzr, [sp, #32]!                 // 8-byte Folded Spill
 	.cfi_def_cfa_offset 0
 	hint	#29
 	.cfi_negate_ra_state
@@ -607,8 +609,9 @@
 	mov	x0, x19
 	.cfi_def_cfa wsp, 48
 	ldp	x20, x19, [sp, #32]             // 16-byte Folded Reload
+	ldp	x29, x30, [sp]                  // 16-byte Folded Reload
 	ldr	x21, [sp, #16]                  // 8-byte Folded Reload
-	ldp	x29, x30, [sp], #48             // 16-byte Folded Reload
+	str	xzr, [sp, #48]!                 // 8-byte Folded Spill
 	.cfi_def_cfa_offset 0
 	hint	#29
 	.cfi_negate_ra_state
@@ -857,7 +860,8 @@
 	.cfi_def_cfa wsp, 48
 	ldp	x19, x15, [sp, #32]             // 16-byte Folded Reload
 	ldp	x21, x20, [sp, #16]             // 16-byte Folded Reload
-	ldp	x29, x30, [sp], #48             // 16-byte Folded Reload
+	ldp	x29, x30, [sp]                  // 16-byte Folded Reload
+	str	xzr, [sp, #48]!                 // 8-byte Folded Spill
 	.cfi_def_cfa_offset 0
 	hint	#29
 	.cfi_negate_ra_state
@@ -922,8 +926,9 @@
 .LBB8_4:                                // %while.end
 	.cfi_def_cfa wsp, 48
 	ldp	x19, x15, [sp, #32]             // 16-byte Folded Reload
+	ldp	x29, x30, [sp]                  // 16-byte Folded Reload
 	ldr	x20, [sp, #16]                  // 8-byte Folded Reload
-	ldp	x29, x30, [sp], #48             // 16-byte Folded Reload
+	str	xzr, [sp, #48]!                 // 8-byte Folded Spill
 	.cfi_def_cfa_offset 0
 	hint	#29
 	.cfi_negate_ra_state
@@ -976,7 +981,8 @@
 .LBB9_2:                                // %if.end
 	.cfi_def_cfa wsp, 32
 	ldp	x19, x15, [sp, #16]             // 16-byte Folded Reload
-	ldp	x29, x30, [sp], #32             // 16-byte Folded Reload
+	ldp	x29, x30, [sp]                  // 16-byte Folded Reload
+	str	xzr, [sp, #32]!                 // 8-byte Folded Spill
 	.cfi_def_cfa_offset 0
 	hint	#29
 	.cfi_negate_ra_state
@@ -1267,8 +1273,9 @@
 	.cfi_def_cfa wsp, 64
 	ldp	x19, x15, [sp, #48]             // 16-byte Folded Reload
 	ldp	x21, x20, [sp, #32]             // 16-byte Folded Reload
+	ldp	x29, x30, [sp]                  // 16-byte Folded Reload
 	ldr	x22, [sp, #16]                  // 8-byte Folded Reload
-	ldp	x29, x30, [sp], #64             // 16-byte Folded Reload
+	str	xzr, [sp, #64]!                 // 8-byte Folded Spill
 	.cfi_def_cfa_offset 0
 	hint	#29
 	.cfi_negate_ra_state
@@ -1370,7 +1377,8 @@
 	.cfi_def_cfa wsp, 48
 	ldp	x19, x15, [sp, #32]             // 16-byte Folded Reload
 	ldp	x21, x20, [sp, #16]             // 16-byte Folded Reload
-	ldp	x29, x30, [sp], #48             // 16-byte Folded Reload
+	ldp	x29, x30, [sp]                  // 16-byte Folded Reload
+	str	xzr, [sp, #48]!                 // 8-byte Folded Spill
 	.cfi_def_cfa_offset 0
 	hint	#29
 	.cfi_negate_ra_state
@@ -1554,8 +1562,9 @@
 	.cfi_def_cfa wsp, 64
 	ldp	x19, x15, [sp, #48]             // 16-byte Folded Reload
 	ldp	x21, x20, [sp, #32]             // 16-byte Folded Reload
+	ldp	x29, x30, [sp]                  // 16-byte Folded Reload
 	ldr	x22, [sp, #16]                  // 8-byte Folded Reload
-	ldp	x29, x30, [sp], #64             // 16-byte Folded Reload
+	str	xzr, [sp, #64]!                 // 8-byte Folded Spill
 	.cfi_def_cfa_offset 0
 	hint	#29
 	.cfi_negate_ra_state
@@ -1589,8 +1598,9 @@
 	.cfi_def_cfa wsp, 64
 	ldp	x19, x15, [sp, #48]             // 16-byte Folded Reload
 	ldp	x21, x20, [sp, #32]             // 16-byte Folded Reload
+	ldp	x29, x30, [sp]                  // 16-byte Folded Reload
 	ldr	x22, [sp, #16]                  // 8-byte Folded Reload
-	ldp	x29, x30, [sp], #64             // 16-byte Folded Reload
+	str	xzr, [sp, #64]!                 // 8-byte Folded Spill
 	.cfi_def_cfa_offset 0
 	hint	#29
 	.cfi_negate_ra_state
@@ -1841,6 +1851,7 @@
 	ldp	x21, x20, [sp, #48]             // 16-byte Folded Reload
 	ldp	x23, x22, [sp, #32]             // 16-byte Folded Reload
 	ldp	x29, x30, [sp, #16]             // 16-byte Folded Reload
+	str	xzr, [sp, #16]                  // 8-byte Folded Spill
 	add	sp, sp, #80
 	.cfi_def_cfa_offset 0
 	hint	#29
@@ -1979,8 +1990,9 @@
 	bl	xasprintf
 	ldr	x0, [x29, #24]
 	.cfi_def_cfa wsp, 32
+	ldp	x29, x30, [sp]                  // 16-byte Folded Reload
 	ldr	x15, [sp, #16]                  // 8-byte Folded Reload
-	ldp	x29, x30, [sp], #32             // 16-byte Folded Reload
+	str	xzr, [sp, #32]!                 // 8-byte Folded Spill
 	.cfi_def_cfa_offset 0
 	hint	#29
 	.cfi_negate_ra_state
@@ -2034,8 +2046,9 @@
 	ldr	x0, [x9, x8, lsl #3]
 	.cfi_def_cfa wsp, 32
 .LBB14_8:                               // %sw.bb
+	ldp	x29, x30, [sp]                  // 16-byte Folded Reload
 	ldr	x15, [sp, #16]                  // 8-byte Folded Reload
-	ldp	x29, x30, [sp], #32             // 16-byte Folded Reload
+	str	xzr, [sp, #32]!                 // 8-byte Folded Spill
 	.cfi_def_cfa_offset 0
 	hint	#29
 	.cfi_negate_ra_state
@@ -2175,8 +2188,9 @@
 .LBB18_5:                               // %cleanup
 	.cfi_def_cfa wsp, 48
 	ldp	x19, x15, [sp, #32]             // 16-byte Folded Reload
+	ldp	x29, x30, [sp]                  // 16-byte Folded Reload
 	ldr	x20, [sp, #16]                  // 8-byte Folded Reload
-	ldp	x29, x30, [sp], #48             // 16-byte Folded Reload
+	str	xzr, [sp, #48]!                 // 8-byte Folded Spill
 	.cfi_def_cfa_offset 0
 	hint	#29
 	.cfi_negate_ra_state
@@ -2240,8 +2254,9 @@
 	mov	x0, x19
 	.cfi_def_cfa wsp, 48
 	ldp	x19, x15, [sp, #32]             // 16-byte Folded Reload
+	ldp	x29, x30, [sp]                  // 16-byte Folded Reload
 	ldr	x20, [sp, #16]                  // 8-byte Folded Reload
-	ldp	x29, x30, [sp], #48             // 16-byte Folded Reload
+	str	xzr, [sp, #48]!                 // 8-byte Folded Spill
 	.cfi_def_cfa_offset 0
 	hint	#29
 	.cfi_negate_ra_state
@@ -2644,7 +2659,8 @@
 	mov	x0, x19
 	.cfi_def_cfa wsp, 32
 	ldp	x20, x19, [sp, #16]             // 16-byte Folded Reload
-	ldp	x29, x30, [sp], #32             // 16-byte Folded Reload
+	ldp	x29, x30, [sp]                  // 16-byte Folded Reload
+	str	xzr, [sp, #32]!                 // 8-byte Folded Spill
 	.cfi_def_cfa_offset 0
 	hint	#29
 	.cfi_negate_ra_state
@@ -2969,8 +2985,9 @@
 .LBB21_44:                              // %options_array_RB_INSERT.exit
 	.cfi_def_cfa wsp, 48
 	ldp	x19, x15, [sp, #32]             // 16-byte Folded Reload
+	ldp	x29, x30, [sp]                  // 16-byte Folded Reload
 	ldr	x20, [sp, #16]                  // 8-byte Folded Reload
-	ldp	x29, x30, [sp], #48             // 16-byte Folded Reload
+	str	xzr, [sp, #48]!                 // 8-byte Folded Spill
 	.cfi_def_cfa_offset 0
 	hint	#29
 	.cfi_negate_ra_state
@@ -3036,8 +3053,9 @@
 // %bb.5:                               // %if.then10
 	.cfi_def_cfa wsp, 48
 	ldp	x19, x15, [sp, #32]             // 16-byte Folded Reload
+	ldp	x29, x30, [sp]                  // 16-byte Folded Reload
 	ldr	x20, [sp, #16]                  // 8-byte Folded Reload
-	ldp	x29, x30, [sp], #48             // 16-byte Folded Reload
+	str	xzr, [sp, #48]!                 // 8-byte Folded Spill
 	.cfi_def_cfa_offset 0
 	hint	#29
 	.cfi_negate_ra_state
@@ -3051,8 +3069,9 @@
 	.cfi_restore_state
 	.cfi_def_cfa wsp, 48
 	ldp	x19, x15, [sp, #32]             // 16-byte Folded Reload
+	ldp	x29, x30, [sp]                  // 16-byte Folded Reload
 	ldr	x20, [sp, #16]                  // 8-byte Folded Reload
-	ldp	x29, x30, [sp], #48             // 16-byte Folded Reload
+	str	xzr, [sp, #48]!                 // 8-byte Folded Spill
 	.cfi_def_cfa_offset 0
 	hint	#29
 	.cfi_negate_ra_state
@@ -3354,8 +3373,9 @@
 	ldr	x0, [x1]
 	mov	w1, wzr
 	.cfi_def_cfa wsp, 32
+	ldp	x29, x30, [sp]                  // 16-byte Folded Reload
 	ldr	x15, [sp, #16]                  // 8-byte Folded Reload
-	ldp	x29, x30, [sp], #32             // 16-byte Folded Reload
+	str	xzr, [sp, #32]!                 // 8-byte Folded Spill
 	.cfi_def_cfa_offset 0
 	hint	#29
 	.cfi_negate_ra_state
@@ -3398,8 +3418,9 @@
 	ldr	x0, [x1]
 	.cfi_def_cfa wsp, 32
 .LBB30_10:                              // %if.end54
+	ldp	x29, x30, [sp]                  // 16-byte Folded Reload
 	ldr	x15, [sp, #16]                  // 8-byte Folded Reload
-	ldp	x29, x30, [sp], #32             // 16-byte Folded Reload
+	str	xzr, [sp, #32]!                 // 8-byte Folded Spill
 	.cfi_def_cfa_offset 0
 	hint	#29
 	.cfi_negate_ra_state
@@ -3451,8 +3472,9 @@
 .LBB30_19:                              // %sw.epilog
 	ldr	x0, [x29, #24]
 	.cfi_def_cfa wsp, 32
+	ldp	x29, x30, [sp]                  // 16-byte Folded Reload
 	ldr	x15, [sp, #16]                  // 8-byte Folded Reload
-	ldp	x29, x30, [sp], #32             // 16-byte Folded Reload
+	str	xzr, [sp, #32]!                 // 8-byte Folded Spill
 	.cfi_def_cfa_offset 0
 	hint	#29
 	.cfi_negate_ra_state
@@ -3563,7 +3585,8 @@
 	.cfi_def_cfa wsp, 48
 	ldp	x19, x15, [sp, #32]             // 16-byte Folded Reload
 	ldp	x21, x20, [sp, #16]             // 16-byte Folded Reload
-	ldp	x29, x30, [sp], #48             // 16-byte Folded Reload
+	ldp	x29, x30, [sp]                  // 16-byte Folded Reload
+	str	xzr, [sp, #48]!                 // 8-byte Folded Spill
 	.cfi_def_cfa_offset 0
 	hint	#29
 	.cfi_negate_ra_state
@@ -3652,8 +3675,9 @@
 	.cfi_def_cfa wsp, 64
 	ldp	x19, x15, [sp, #48]             // 16-byte Folded Reload
 	ldp	x21, x20, [sp, #32]             // 16-byte Folded Reload
+	ldp	x29, x30, [sp]                  // 16-byte Folded Reload
 	ldr	x22, [sp, #16]                  // 8-byte Folded Reload
-	ldp	x29, x30, [sp], #64             // 16-byte Folded Reload
+	str	xzr, [sp, #64]!                 // 8-byte Folded Spill
 	.cfi_def_cfa_offset 0
 	hint	#29
 	.cfi_negate_ra_state
@@ -3797,8 +3821,9 @@
 	ldp	x21, x20, [sp, #64]             // 16-byte Folded Reload
 	ldp	x23, x22, [sp, #48]             // 16-byte Folded Reload
 	ldp	x25, x24, [sp, #32]             // 16-byte Folded Reload
+	ldp	x29, x30, [sp]                  // 16-byte Folded Reload
 	ldr	x26, [sp, #16]                  // 8-byte Folded Reload
-	ldp	x29, x30, [sp], #96             // 16-byte Folded Reload
+	str	xzr, [sp, #96]!                 // 8-byte Folded Spill
 	.cfi_def_cfa_offset 0
 	hint	#29
 	.cfi_negate_ra_state
@@ -3839,8 +3864,9 @@
 	ldp	x21, x20, [sp, #64]             // 16-byte Folded Reload
 	ldp	x23, x22, [sp, #48]             // 16-byte Folded Reload
 	ldp	x25, x24, [sp, #32]             // 16-byte Folded Reload
+	ldp	x29, x30, [sp]                  // 16-byte Folded Reload
 	ldr	x26, [sp, #16]                  // 8-byte Folded Reload
-	ldp	x29, x30, [sp], #96             // 16-byte Folded Reload
+	str	xzr, [sp, #96]!                 // 8-byte Folded Spill
 	.cfi_def_cfa_offset 0
 	hint	#29
 	.cfi_negate_ra_state
@@ -3930,8 +3956,9 @@
 	.cfi_def_cfa wsp, 64
 	ldp	x19, x15, [sp, #48]             // 16-byte Folded Reload
 	ldp	x21, x20, [sp, #32]             // 16-byte Folded Reload
+	ldp	x29, x30, [sp]                  // 16-byte Folded Reload
 	ldr	x22, [sp, #16]                  // 8-byte Folded Reload
-	ldp	x29, x30, [sp], #64             // 16-byte Folded Reload
+	str	xzr, [sp, #64]!                 // 8-byte Folded Spill
 	.cfi_def_cfa_offset 0
 	hint	#29
 	.cfi_negate_ra_state
@@ -4002,8 +4029,9 @@
 	ldr	x0, [x0, #24]
 	.cfi_def_cfa wsp, 48
 	ldp	x19, x15, [sp, #32]             // 16-byte Folded Reload
+	ldp	x29, x30, [sp]                  // 16-byte Folded Reload
 	ldr	x20, [sp, #16]                  // 8-byte Folded Reload
-	ldp	x29, x30, [sp], #48             // 16-byte Folded Reload
+	str	xzr, [sp, #48]!                 // 8-byte Folded Spill
 	.cfi_def_cfa_offset 0
 	hint	#29
 	.cfi_negate_ra_state
@@ -4085,8 +4113,9 @@
 	ldr	x0, [x0, #24]
 	.cfi_def_cfa wsp, 48
 	ldp	x19, x15, [sp, #32]             // 16-byte Folded Reload
+	ldp	x29, x30, [sp]                  // 16-byte Folded Reload
 	ldr	x20, [sp, #16]                  // 8-byte Folded Reload
-	ldp	x29, x30, [sp], #48             // 16-byte Folded Reload
+	str	xzr, [sp, #48]!                 // 8-byte Folded Spill
 	.cfi_def_cfa_offset 0
 	hint	#29
 	.cfi_negate_ra_state
@@ -4240,6 +4269,7 @@
 	ldp	x21, x20, [sp, #272]            // 16-byte Folded Reload
 	ldp	x28, x22, [sp, #256]            // 16-byte Folded Reload
 	ldp	x29, x30, [sp, #240]            // 16-byte Folded Reload
+	str	xzr, [sp, #240]                 // 8-byte Folded Spill
 	add	sp, sp, #304
 	.cfi_def_cfa_offset 0
 	hint	#29
@@ -4374,8 +4404,9 @@
 	.cfi_def_cfa wsp, 64
 	ldp	x19, x15, [sp, #48]             // 16-byte Folded Reload
 	ldp	x21, x20, [sp, #32]             // 16-byte Folded Reload
+	ldp	x29, x30, [sp]                  // 16-byte Folded Reload
 	ldr	x22, [sp, #16]                  // 8-byte Folded Reload
-	ldp	x29, x30, [sp], #64             // 16-byte Folded Reload
+	str	xzr, [sp, #64]!                 // 8-byte Folded Spill
 	.cfi_def_cfa_offset 0
 	hint	#29
 	.cfi_negate_ra_state
@@ -4475,8 +4506,9 @@
 	ldp	x23, x22, [sp, #64]             // 16-byte Folded Reload
 	ldp	x25, x24, [sp, #48]             // 16-byte Folded Reload
 	ldp	x27, x26, [sp, #32]             // 16-byte Folded Reload
+	ldp	x29, x30, [sp]                  // 16-byte Folded Reload
 	ldr	x28, [sp, #16]                  // 8-byte Folded Reload
-	ldp	x29, x30, [sp], #112            // 16-byte Folded Reload
+	str	xzr, [sp, #112]!                // 8-byte Folded Spill
 	.cfi_def_cfa_offset 0
 	hint	#29
 	.cfi_negate_ra_state
@@ -4589,8 +4621,9 @@
 	ldp	x23, x22, [sp, #64]             // 16-byte Folded Reload
 	ldp	x25, x24, [sp, #48]             // 16-byte Folded Reload
 	ldp	x27, x26, [sp, #32]             // 16-byte Folded Reload
+	ldp	x29, x30, [sp]                  // 16-byte Folded Reload
 	ldr	x28, [sp, #16]                  // 8-byte Folded Reload
-	ldp	x29, x30, [sp], #112            // 16-byte Folded Reload
+	str	xzr, [sp, #112]!                // 8-byte Folded Spill
 	.cfi_def_cfa_offset 0
 	hint	#29
 	.cfi_negate_ra_state
@@ -4748,8 +4781,9 @@
 	ldp	x21, x20, [sp, #64]             // 16-byte Folded Reload
 	ldp	x23, x22, [sp, #48]             // 16-byte Folded Reload
 	ldp	x25, x24, [sp, #32]             // 16-byte Folded Reload
+	ldp	x29, x30, [sp]                  // 16-byte Folded Reload
 	ldr	x26, [sp, #16]                  // 8-byte Folded Reload
-	ldp	x29, x30, [sp], #96             // 16-byte Folded Reload
+	str	xzr, [sp, #96]!                 // 8-byte Folded Spill
 	.cfi_def_cfa_offset 0
 	hint	#29
 	.cfi_negate_ra_state
@@ -4981,8 +5015,9 @@
 	.cfi_def_cfa wsp, 64
 	ldp	x19, x15, [sp, #48]             // 16-byte Folded Reload
 	ldp	x21, x20, [sp, #32]             // 16-byte Folded Reload
+	ldp	x29, x30, [sp]                  // 16-byte Folded Reload
 	ldr	x22, [sp, #16]                  // 8-byte Folded Reload
-	ldp	x29, x30, [sp], #64             // 16-byte Folded Reload
+	str	xzr, [sp, #64]!                 // 8-byte Folded Spill
 	.cfi_def_cfa_offset 0
 	hint	#29
 	.cfi_negate_ra_state
@@ -5067,7 +5102,8 @@
 	ldp	x19, x15, [sp, #48]             // 16-byte Folded Reload
 	ldp	x21, x20, [sp, #32]             // 16-byte Folded Reload
 	ldp	x23, x22, [sp, #16]             // 16-byte Folded Reload
-	ldp	x29, x30, [sp], #64             // 16-byte Folded Reload
+	ldp	x29, x30, [sp]                  // 16-byte Folded Reload
+	str	xzr, [sp, #64]!                 // 8-byte Folded Spill
 	.cfi_def_cfa_offset 0
 	hint	#29
 	.cfi_negate_ra_state
@@ -5267,6 +5303,7 @@
 	ldp	x23, x22, [sp, #128]            // 16-byte Folded Reload
 	ldp	x25, x24, [sp, #112]            // 16-byte Folded Reload
 	ldp	x29, x30, [sp, #96]             // 16-byte Folded Reload
+	str	xzr, [sp, #96]                  // 8-byte Folded Spill
 	add	sp, sp, #176
 	.cfi_def_cfa_offset 0
 	hint	#29
@@ -5806,8 +5843,9 @@
 	.cfi_def_cfa wsp, 64
 	ldp	x19, x15, [sp, #48]             // 16-byte Folded Reload
 	ldp	x21, x20, [sp, #32]             // 16-byte Folded Reload
+	ldp	x29, x30, [sp]                  // 16-byte Folded Reload
 	ldr	x22, [sp, #16]                  // 8-byte Folded Reload
-	ldp	x29, x30, [sp], #64             // 16-byte Folded Reload
+	str	xzr, [sp, #64]!                 // 8-byte Folded Spill
 	.cfi_def_cfa_offset 0
 	hint	#29
 	.cfi_negate_ra_state
@@ -6025,7 +6063,8 @@
 .LBB45_20:                              // %cleanup
 	.cfi_def_cfa wsp, 32
 	ldp	x19, x15, [sp, #16]             // 16-byte Folded Reload
-	ldp	x29, x30, [sp], #32             // 16-byte Folded Reload
+	ldp	x29, x30, [sp]                  // 16-byte Folded Reload
+	str	xzr, [sp, #32]!                 // 8-byte Folded Spill
 	.cfi_def_cfa_offset 0
 	hint	#29
 	.cfi_negate_ra_state