From e3f166b84fc442150b7eb03ec02828afde265daf Mon Sep 17 00:00:00 2001 From: Chuanqi Xu Date: Mon, 6 Jan 2025 22:54:12 +0800 Subject: [PATCH] [CIR][CIRGen] Improve emission for array of unions (#1236) Close https://github.com/llvm/clangir/issues/1185 The patch itself seems slightly ad-hoc. As the issue tracked by https://github.com/llvm/clangir/issues/1178, the fundamental solution may be to introduce two type systems to solve the inconsistent semantics for Union between LLVM IR and CIR. This will be great to handle other inconsistent semantics between LLVM IR and CIR if any. Back to the patch itself, although the code looks not good initially to me too. But I feel it may be a good workaround since clang/test/CIR/Lowering/union-array.c is an example for array of unions directly and clang/test/CIR/Lowering/nested-union-array.c gives an example for array of unions indirectly (array of structs which contain unions). So I feel we've already covered all the cases. And generally it should be good to use some simple and solid workaround before we introduce the formal full solution. --- clang/lib/CIR/CodeGen/CIRGenExprConst.cpp | 48 ++++++++++++++++++++++- clang/test/CIR/CodeGen/union-array.c | 22 +++++++++++ 2 files changed, 69 insertions(+), 1 deletion(-) create mode 100644 clang/test/CIR/CodeGen/union-array.c diff --git a/clang/lib/CIR/CodeGen/CIRGenExprConst.cpp b/clang/lib/CIR/CodeGen/CIRGenExprConst.cpp index dbd78284349d..3eb8627084f4 100644 --- a/clang/lib/CIR/CodeGen/CIRGenExprConst.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenExprConst.cpp @@ -1015,6 +1015,50 @@ class ConstExprEmitter return {}; } + auto desiredType = CGM.getTypes().ConvertType(T); + // FIXME(cir): A hack to handle the emission of arrays of unions directly. + // See clang/test/CIR/CodeGen/union-array.c and + // clang/test/CIR/Lowering/nested-union-array.c for example. The root + // cause of these problems is CIR handles union differently than LLVM IR. + // So we can't fix the problem fundamentally by mocking LLVM's handling for + // unions. In LLVM, the union is basically a struct with the largest member + // of the union and consumers cast the union arbitrarily according to their + // needs. But in CIR, we tried to express union semantics properly. This is + // a fundamental difference. + // + // Concretely, for the problem here, if we're constructing the initializer + // for the array of unions, we can't even assume the type of the elements in + // the initializer are the same! It is odd that we can have an array with + // different element types. Here we just pretend it is fine by checking if + // we're constructing an array for an array of unions. If we didn't do so, + // we may meet problems during lowering to LLVM. To solve the problem, we + // may need to introduce 2 type systems for CIR: one for the CIR itself and + // one for lowering. e.g., we can compare the type of CIR during CIRGen, + // analysis and transformations without worrying the concerns here. And + // lower to LLVM IR (or anyother dialects) with the proper type. + // + // (Although the idea to make CIR's type system self contained and generate + // LLVM's + // types in later passes look fine, it has engineering level concern that + // it will make the skeleton of CIRGen to be diverged from the traditional + // CodeGen.) + // + // Besides union, there are other differences between CIR and LLVM's type + // system. e.g., LLVM's pointer types are opaque while CIR has concrete + // pointer types. + bool isDesiredArrayOfUnionDirectly = [&]() { + auto desiredArrayType = dyn_cast(desiredType); + if (!desiredArrayType) + return false; + + auto elementStructType = + dyn_cast(desiredArrayType.getEltType()); + if (!elementStructType) + return false; + + return elementStructType.isUnion(); + }(); + // Emit initializer elements as MLIR attributes and check for common type. mlir::Type CommonElementType; for (unsigned i = 0; i != NumInitableElts; ++i) { @@ -1024,10 +1068,12 @@ class ConstExprEmitter return {}; if (i == 0) CommonElementType = C.getType(); + else if (isDesiredArrayOfUnionDirectly && + C.getType() != CommonElementType) + CommonElementType = nullptr; Elts.push_back(std::move(C)); } - auto desiredType = CGM.getTypes().ConvertType(T); auto typedFiller = llvm::dyn_cast_or_null(Filler); if (Filler && !typedFiller) llvm_unreachable("We shouldn't be receiving untyped attrs here"); diff --git a/clang/test/CIR/CodeGen/union-array.c b/clang/test/CIR/CodeGen/union-array.c new file mode 100644 index 000000000000..8ac8264b159d --- /dev/null +++ b/clang/test/CIR/CodeGen/union-array.c @@ -0,0 +1,22 @@ +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir -fno-clangir-call-conv-lowering %s -o %t.cir +// RUN: FileCheck --input-file=%t.cir %s --check-prefix=CIR +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm -fno-clangir-call-conv-lowering %s -o %t.ll +// RUN: FileCheck --input-file=%t.ll %s --check-prefix=LLVM + +typedef struct { + char a; +} S_1; + +typedef struct { + long a, b; +} S_2; + +typedef union { + S_1 a; + S_2 b; +} U; + +void foo() { U arr[2] = {{.b = {1, 2}}, {.a = {1}}}; } + +// CIR: cir.const #cir.const_struct<{#cir.const_struct<{#cir.const_struct<{#cir.int<1> : !s64i, #cir.int<2> : !s64i}> : {{.*}}}> : {{.*}}, #cir.const_struct<{#cir.const_struct<{#cir.int<1> : !s8i}> : {{.*}}, #cir.const_array<[#cir.zero : !u8i, #cir.zero : !u8i, #cir.zero : !u8i, #cir.zero : !u8i, #cir.zero : !u8i, #cir.zero : !u8i, #cir.zero : !u8i, #cir.zero : !u8i, #cir.zero : !u8i, #cir.zero : !u8i, #cir.zero : !u8i, #cir.zero : !u8i, #cir.zero : !u8i, #cir.zero : !u8i, #cir.zero : !u8i]> : !cir.array}> +// LLVM: store { { %struct.S_2 }, { %struct.S_1, [15 x i8] } } { { %struct.S_2 } { %struct.S_2 { i64 1, i64 2 } }, { %struct.S_1, [15 x i8] } { %struct.S_1 { i8 1 }, [15 x i8] zeroinitializer } }