Bug 14231 - not_const_pp is causing memory reorder in clang without lto in arm
Summary: not_const_pp is causing memory reorder in clang without lto in arm
Status: RESOLVED FIXED
Alias: None
Product: Busybox
Classification: Unclassified
Component: Standard Compliance (show other bugs)
Version: 1.33.x
Hardware: Other Other
: P5 critical
Target Milestone: ---
Assignee: YU Jincheng
URL:
Keywords:
Depends on:
Blocks:
 
Reported: 2021-09-25 09:32 UTC by YU Jincheng
Modified: 2021-10-15 20:53 UTC (History)
1 user (show)

See Also:
Host:
Target:
Build:


Attachments

Note You need to log in before you can comment on or make changes to this bug.
Description YU Jincheng 2021-09-25 09:32:33 UTC
In ash.c, there's `not_const_pp` which converts a const pointer to non-const one. However, this will cause memory reorder and leads to SIGSEV.

Here's the instruction after compiled init_G from ash.c:


    0x491624 <+32>:   bl     0x4472d4                  ; xzalloc at xfuncs_printf.c:70
    0x491628 <+36>:   bl     0x49e070                  ; OUTLINED_FUNCTION_1
    0x49162c <+40>:   mov    w9, #-0x1
    0x491630 <+44>:   ldr    x21, [x8]
    0x491634 <+48>:   str    x0, [x8]
    0x491638 <+52>:   add    x8, x21, #0x54            ; =0x54
->  0x49163c <+56>:   stp    x8, x8, [x21, #0x20] ;<--- crash point

Here, x8 is the register that stores the addr of `ash_ptr_to_globals_misc`. It's loaded first to x21 and then store from x0. And thus the reset instruction crashed since x21 is 0 (addr of `ash_ptr_to_globals_misc` before assigned by `xzalloc`).

I think the key problem is that in clang 9+, `not_const_pp` has a workaround that breaks the memory dependency and thus the compiler reorders the memory.
Comment 1 YU Jincheng 2021-09-25 11:34:31 UTC
Note that barrier() is not working since `ash_ptr_to_globals_misc` is const so the compiler assumes it will NEVER change and optimize like that. Chaning `ash_ptr_to_globals_misc` to non-const gracefully fixed this.
Comment 2 YU Jincheng 2021-09-25 11:59:00 UTC
Ok I finally find the comment about `-DBB_GLOBAL_CONST=""` and it fixes.
Comment 3 Denys Vlasenko 2021-09-25 17:11:07 UTC
(In reply to YU Jincheng from comment #2)
> Ok I finally find the comment about `-DBB_GLOBAL_CONST=""` and it fixes.

 * However, this may break on weird arches or toolchains. In this case,
 * set "-DBB_GLOBAL_CONST=''" in CONFIG_EXTRA_CFLAGS to disable
 * this optimization.

This is not a fix, it's more of a work-around: constant optimization reduces code size by _kilobytes_ here.

We need to find a way to prevent clang from reordering loads and stores here:

    0x491630 <+44>:   ldr    x21, [x8]
    0x491634 <+48>:   str    x0, [x8]

IOW: set ash_ptr_to_globals_misc to the needed value, THEN load it (or better yet, just use x0 since that's the value).

Can you research what would work for clang 9 here?

#define INIT_G_misc() do { \
        (*(struct globals_misc**)not_const_pp(&ash_ptr_to_globals_misc)) = xzalloc(sizeof(G_misc)); \
        barrier(); \
        savestatus = -1; \

Adding another "barrier();" _before_ the pointer assignment? Some other trick?
Comment 4 YU Jincheng 2021-09-26 08:09:38 UTC
I did have one trick.

```
#define INIT_G_misc() do { \
	(*(struct globals_misc**)not_const_pp(&ash_ptr_to_globals_misc)) = ({ \
		struct globals_misc *BB_GLOBAL_CONST ash_ptr_to_globals_misc = xzalloc(sizeof(G_misc));  \
		savestatus = -1; \
		curdir = nullstr; \
	    physdir = nullstr; \
	    trap_ptr = trap; \
		ash_ptr_to_globals_misc;});                                                                                                                                     \
} while (0)
```

And the result is:
.text:0000000000491608                 BL              xzalloc
.text:000000000049160C                 ADRP            X21, #ash_ptr_to_globals_misc_ptr@PAGE
.text:0000000000491610                 LDR             X21, [X21,#ash_ptr_to_globals_misc_ptr@PAGEOFF]
.text:0000000000491614                 MOV             W8, #0xFFFFFFFF
.text:0000000000491618                 ADD             X9, X0, #0x54 ; 'T'
.text:000000000049161C                 ADD             X10, X0, #0xE8
.text:0000000000491620                 STR             W8, [X0,#8]
.text:0000000000491624                 MOV             X8, X21
.text:0000000000491628                 STP             X9, X9, [X0,#0x20]
.text:000000000049162C                 STR             X10, [X0,#0x2F0]
.text:0000000000491630                 STR             X0, [X8]
.text:0000000000491634                 MOV             W0, #0x220
Comment 5 YU Jincheng 2021-09-26 08:09:39 UTC
I did have one trick.

```
#define INIT_G_misc() do { \
	(*(struct globals_misc**)not_const_pp(&ash_ptr_to_globals_misc)) = ({ \
		struct globals_misc *BB_GLOBAL_CONST ash_ptr_to_globals_misc = xzalloc(sizeof(G_misc));  \
		savestatus = -1; \
		curdir = nullstr; \
	    physdir = nullstr; \
	    trap_ptr = trap; \
		ash_ptr_to_globals_misc;});                                                                                                                                     \
} while (0)
```

And the result is:
.text:0000000000491608                 BL              xzalloc
.text:000000000049160C                 ADRP            X21, #ash_ptr_to_globals_misc_ptr@PAGE
.text:0000000000491610                 LDR             X21, [X21,#ash_ptr_to_globals_misc_ptr@PAGEOFF]
.text:0000000000491614                 MOV             W8, #0xFFFFFFFF
.text:0000000000491618                 ADD             X9, X0, #0x54 ; 'T'
.text:000000000049161C                 ADD             X10, X0, #0xE8
.text:0000000000491620                 STR             W8, [X0,#8]
.text:0000000000491624                 MOV             X8, X21
.text:0000000000491628                 STP             X9, X9, [X0,#0x20]
.text:000000000049162C                 STR             X10, [X0,#0x2F0]
.text:0000000000491630                 STR             X0, [X8]
.text:0000000000491634                 MOV             W0, #0x220
Comment 6 YU Jincheng 2021-09-27 17:18:55 UTC
I have found a trick that fixes this issue. Will submit a PR.
Comment 7 YU Jincheng 2021-09-28 02:47:37 UTC
I am having some trouble about subscribing the mail list and I cannot submit the patch to it so I decided to submit here.
===
From: YU Jincheng <shana@zju.edu.cn>

- clang 9+ will load the const pointer first before the const
  pointer assignment trick and thus cause null pointer defer.
- This patch creates a shadow variable to prevent the above from
  happening for clang 9+.
- Also, this patch applies `BB_GLOBAL_CONST` to all variables
  using the same trick, allowing archs or toolchains having the
  same issue to bypass this trick correctly.

This patch fixes https://bugs.busybox.net/show_bug.cgi?id=14231
and https://bugs.busybox.net/show_bug.cgi?id=14236

Co-authored-by: canyie <31466456+canyie@users.noreply.github.com>
Signed-off-by: YU Jincheng <shana@zju.edu.cn>
---
 coreutils/test.c  |  7 ++-----
 include/libbb.h   | 34 +++++++++++++++++++++++++++++-----
 libbb/appletlib.c |  3 +--
 libbb/lineedit.c  |  7 ++-----
 shell/ash.c       | 23 +++--------------------
 5 files changed, 37 insertions(+), 37 deletions(-)

diff --git a/coreutils/test.c b/coreutils/test.c
index 7c6574334..3a91de407 100644
--- a/coreutils/test.c
+++ b/coreutils/test.c
@@ -435,7 +435,7 @@ struct test_statics {
 };
 
 /* See test_ptr_hack.c */
-extern struct test_statics *const test_ptr_to_statics;
+extern struct test_statics *BB_GLOBAL_CONST test_ptr_to_statics;
 
 #define S (*test_ptr_to_statics)
 #define args            (S.args         )
@@ -445,10 +445,7 @@ extern struct test_statics *const test_ptr_to_statics;
 #define bash_test2      (S.bash_test2   )
 #define leaving         (S.leaving      )
 
-#define INIT_S() do { \
-	(*(struct test_statics**)not_const_pp(&test_ptr_to_statics)) = xzalloc(sizeof(S)); \
-	barrier(); \
-} while (0)
+#define INIT_S() ASSIGN_CONST_PTR(test_ptr_to_statics, xzalloc(sizeof(S)))
 #define DEINIT_S() do { \
 	free(group_array); \
 	free(test_ptr_to_statics); \
diff --git a/include/libbb.h b/include/libbb.h
index dfcaa05ec..cdc8bd271 100644
--- a/include/libbb.h
+++ b/include/libbb.h
@@ -365,10 +365,24 @@ struct BUG_off_t_size_is_misdetected {
 #endif
 #endif
 
+/* We use a trick to have more optimized code (fewer pointer reloads
+ * and reduce binary size by a few kilobytes) like:
+ *  ash.c:   extern struct globals *const ash_ptr_to_globals;
+ *  ash_ptr_hack.c: struct globals *ash_ptr_to_globals;
+ * This way, compiler in ash.c knows the pointer can not change.
+ *
+ * However, this may break on weird arches or toolchains. In this case,
+ * set "-DBB_GLOBAL_CONST=''" in CONFIG_EXTRA_CFLAGS to disable
+ * this optimization.
+ */
+#ifndef BB_GLOBAL_CONST
+# define BB_GLOBAL_CONST const
+#endif
+
 #if defined(errno)
 /* If errno is a define, assume it's "define errno (*__errno_location())"
  * and we will cache it's result in this variable */
-extern int *const bb_errno;
+extern int *BB_GLOBAL_CONST bb_errno;
 #undef errno
 #define errno (*bb_errno)
 #define bb_cached_errno_ptr 1
@@ -2284,16 +2298,26 @@ static ALWAYS_INLINE void* not_const_pp(const void *p)
 	);
 	return pp;
 }
+#ifndef ASSIGN_CONST_PTR
+#define ASSIGN_CONST_PTR(p, v) \
+_Pragma("clang diagnostic push"); \
+_Pragma("clang diagnostic ignored \"-Wshadow\""); \
+	__typeof__(p) p = ({*(void**)not_const_pp(&p) = (void*)(v);});
+_Pragma("clang diagnostic pop")
+#endif
 #else
 static ALWAYS_INLINE void* not_const_pp(const void *p) { return (void*)p; }
+#ifndef ASSIGN_CONST_PTR
+#define ASSIGN_CONST_PTR(p, v) do { \
+	(*(void**)not_const_pp(&p)) = (void*)(v); \
+	barrier(); \
+} while (0)
+#endif
 #endif
 
 /* At least gcc 3.4.6 on mipsel system needs optimization barrier */
 #define barrier() __asm__ __volatile__("":::"memory")
-#define SET_PTR_TO_GLOBALS(x) do { \
-	(*(struct globals**)not_const_pp(&ptr_to_globals)) = (void*)(x); \
-	barrier(); \
-} while (0)
+#define SET_PTR_TO_GLOBALS(x) ASSIGN_CONST_PTR(ptr_to_globals, x)
 
 #define FREE_PTR_TO_GLOBALS() do { \
 	if (ENABLE_FEATURE_CLEAN_UP) { \
diff --git a/libbb/appletlib.c b/libbb/appletlib.c
index 14be33603..8b4c0f0e7 100644
--- a/libbb/appletlib.c
+++ b/libbb/appletlib.c
@@ -247,8 +247,7 @@ void lbb_prepare(const char *applet
 		IF_FEATURE_INDIVIDUAL(, char **argv))
 {
 #ifdef bb_cached_errno_ptr
-	(*(int **)not_const_pp(&bb_errno)) = get_perrno();
-	barrier();
+	ASSIGN_CONST_PTR(bb_errno, get_perrno());
 #endif
 	applet_name = applet;
 
diff --git a/libbb/lineedit.c b/libbb/lineedit.c
index a7a3ee103..73194282c 100644
--- a/libbb/lineedit.c
+++ b/libbb/lineedit.c
@@ -192,7 +192,7 @@ struct lineedit_statics {
 };
 
 /* See lineedit_ptr_hack.c */
-extern struct lineedit_statics *const lineedit_ptr_to_statics;
+extern struct lineedit_statics *BB_GLOBAL_CONST lineedit_ptr_to_statics;
 
 #define S (*lineedit_ptr_to_statics)
 #define state            (S.state           )
@@ -213,10 +213,7 @@ extern struct lineedit_statics *const lineedit_ptr_to_statics;
 #define newdelflag       (S.newdelflag      )
 #define delbuf           (S.delbuf          )
 
-#define INIT_S() do { \
-	(*(struct lineedit_statics**)not_const_pp(&lineedit_ptr_to_statics)) = xzalloc(sizeof(S)); \
-	barrier(); \
-} while (0)
+#define INIT_S() ASSIGN_CONST_PTR(lineedit_ptr_to_statics, xzalloc(sizeof(S)))
 
 static void deinit_S(void)
 {
diff --git a/shell/ash.c b/shell/ash.c
index 4bc4f55d0..c8a20ab94 100644
--- a/shell/ash.c
+++ b/shell/ash.c
@@ -303,20 +303,6 @@ typedef long arith_t;
 # error "Do not even bother, ash will not run on NOMMU machine"
 #endif
 
-/* We use a trick to have more optimized code (fewer pointer reloads):
- *  ash.c:   extern struct globals *const ash_ptr_to_globals;
- *  ash_ptr_hack.c: struct globals *ash_ptr_to_globals;
- * This way, compiler in ash.c knows the pointer can not change.
- *
- * However, this may break on weird arches or toolchains. In this case,
- * set "-DBB_GLOBAL_CONST=''" in CONFIG_EXTRA_CFLAGS to disable
- * this optimization.
- */
-#ifndef BB_GLOBAL_CONST
-# define BB_GLOBAL_CONST const
-#endif
-
-
 /* ============ Hash table sizes. Configurable. */
 
 #define VTABSIZE 39
@@ -518,8 +504,7 @@ extern struct globals_misc *BB_GLOBAL_CONST ash_ptr_to_globals_misc;
 #define random_gen  (G_misc.random_gen )
 #define backgndpid  (G_misc.backgndpid )
 #define INIT_G_misc() do { \
-	(*(struct globals_misc**)not_const_pp(&ash_ptr_to_globals_misc)) = xzalloc(sizeof(G_misc)); \
-	barrier(); \
+	ASSIGN_CONST_PTR(ash_ptr_to_globals_misc, xzalloc(sizeof(G_misc))); \
 	savestatus = -1; \
 	curdir = nullstr; \
 	physdir = nullstr; \
@@ -1597,8 +1582,7 @@ extern struct globals_memstack *BB_GLOBAL_CONST ash_ptr_to_globals_memstack;
 #define g_stacknleft (G_memstack.g_stacknleft)
 #define stackbase    (G_memstack.stackbase   )
 #define INIT_G_memstack() do { \
-	(*(struct globals_memstack**)not_const_pp(&ash_ptr_to_globals_memstack)) = xzalloc(sizeof(G_memstack)); \
-	barrier(); \
+	ASSIGN_CONST_PTR(ash_ptr_to_globals_memstack, xzalloc(sizeof(G_memstack))); \
 	g_stackp = &stackbase; \
 	g_stacknxt = stackbase.space; \
 	g_stacknleft = MINSIZE; \
@@ -2229,8 +2213,7 @@ extern struct globals_var *BB_GLOBAL_CONST ash_ptr_to_globals_var;
 #endif
 #define INIT_G_var() do { \
 	unsigned i; \
-	(*(struct globals_var**)not_const_pp(&ash_ptr_to_globals_var)) = xzalloc(sizeof(G_var)); \
-	barrier(); \
+	ASSIGN_CONST_PTR(ash_ptr_to_globals_var, xzalloc(sizeof(G_var))); \
 	for (i = 0; i < ARRAY_SIZE(varinit_data); i++) { \
 		varinit[i].flags    = varinit_data[i].flags; \
 		varinit[i].var_text = varinit_data[i].var_text; \
-- 
2.25.1
Comment 8 Denys Vlasenko 2021-10-15 20:53:15 UTC
Fixed in git