From 2bbc0301d5c7127cbd45b88c63ed946a47a2aacd Mon Sep 17 00:00:00 2001 From: Peng Fan Date: Mon, 3 Nov 2025 01:12:57 +0000 Subject: [PATCH 01/31] LoongArch: Implement NOCE_CONVERSION_PROFITABLE_P to be compatible Signed-off-by: Peng Fan --- gcc/config/loongarch/loongarch.cc | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc index c90fc726f05..85ea8b6f348 100644 --- a/gcc/config/loongarch/loongarch.cc +++ b/gcc/config/loongarch/loongarch.cc @@ -11177,6 +11177,20 @@ loongarch_c_mode_for_suffix (char suffix) return VOIDmode; } +static bool +loongarch_noce_conversion_profitable_p (rtx_insn *seq, struct noce_if_info *if_info) +{ + enum rtx_class cls; + for (rtx_insn *insn = seq; insn; insn = NEXT_INSN (insn)) + { + cls = GET_RTX_CLASS (GET_CODE (SET_SRC (single_set (insn)))); + if ( cls == RTX_COMPARE || cls == RTX_COMM_COMPARE) + return false; + } + + return default_noce_conversion_profitable_p (seq, if_info); +} + /* Initialize the GCC target structure. */ #undef TARGET_ASM_ALIGNED_HI_OP #define TARGET_ASM_ALIGNED_HI_OP "\t.half\t" @@ -11448,6 +11462,9 @@ loongarch_c_mode_for_suffix (char suffix) #undef TARGET_C_MODE_FOR_SUFFIX #define TARGET_C_MODE_FOR_SUFFIX loongarch_c_mode_for_suffix +#undef TARGET_NOCE_CONVERSION_PROFITABLE_P +#define TARGET_NOCE_CONVERSION_PROFITABLE_P loongarch_noce_conversion_profitable_p + struct gcc_target targetm = TARGET_INITIALIZER; #include "gt-loongarch.h" -- Gitee From 4aff955340f5a7502e752b8adc67284434dc61d7 Mon Sep 17 00:00:00 2001 From: Jinyang He Date: Wed, 29 Oct 2025 16:07:35 +0800 Subject: [PATCH 02/31] LoongArch: Only allow valid binary op when optimize conditional move It is wrong that optimize from `if (cond) dest op= 1 << shift` to `dest op= (cond ? 1 : 0) << shift` when `dest op 0 != dest`. Like `and`, `mul` or `div`. And in this optimization `mul` and `div` is optimized to shift. gcc/ChangeLog: * config/loongarch/loongarch.cc (loongarch_expand_conditional_move): Only allow valid binary op when optimize conditional move. gcc/testsuite/ChangeLog: * gcc.target/loongarch/conditional-move-opt-1.c: Remove mul. * gcc.target/loongarch/conditional-move-opt-2.c: Remove and. * gcc.target/loongarch/conditional-move-opt-3.c: New test. Co-Authored-By: Peng Fan --- gcc/config/loongarch/loongarch.cc | 26 ++++++++++++++++--- .../loongarch/conditional-move-opt-1.c | 4 +-- .../loongarch/conditional-move-opt-2.c | 2 +- .../loongarch/conditional-move-opt-3.c | 14 ++++++++++ 4 files changed, 40 insertions(+), 6 deletions(-) create mode 100644 gcc/testsuite/gcc.target/loongarch/conditional-move-opt-3.c diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc index 85ea8b6f348..b44d52adff1 100644 --- a/gcc/config/loongarch/loongarch.cc +++ b/gcc/config/loongarch/loongarch.cc @@ -5426,12 +5426,32 @@ loongarch_expand_conditional_move (rtx *operands) } } + auto is_binary_op_0_keep_orig = [](enum rtx_code code) + { + switch (code) + { + case PLUS: + case MINUS: + case IOR: + case XOR: + case ROTATE: + case ROTATERT: + case ASHIFT: + case ASHIFTRT: + case LSHIFTRT: + return true; + default: + return false; + } + }; + /* Check if the optimization conditions are met. */ if (value_if_true_insn && value_if_false_insn - /* Make sure that value_if_false and var are the same. */ - && BINARY_P (value_if_true_insn_src - = SET_SRC (single_set (value_if_true_insn))) + /* Make sure that the orig value OP 0 keep orig. */ + && (value_if_true_insn_src + = SET_SRC (single_set (value_if_true_insn))) + && is_binary_op_0_keep_orig ( GET_CODE (value_if_true_insn_src)) /* Make sure that both value_if_true and value_if_false has the same var. */ && rtx_equal_p (XEXP (value_if_true_insn_src, 0), diff --git a/gcc/testsuite/gcc.target/loongarch/conditional-move-opt-1.c b/gcc/testsuite/gcc.target/loongarch/conditional-move-opt-1.c index ed13471aa90..47802aa9688 100644 --- a/gcc/testsuite/gcc.target/loongarch/conditional-move-opt-1.c +++ b/gcc/testsuite/gcc.target/loongarch/conditional-move-opt-1.c @@ -27,7 +27,7 @@ void test_lt () { if (lm < ln) - lr *= (1 << 16); + lr += (1 << 16); lr += lm; } @@ -35,7 +35,7 @@ void test_le () { if (lm <= ln) - lr = lm * ((long)1 << 32); + lr = lm + ((long)1 << 32); else lr = lm; lr += lm; diff --git a/gcc/testsuite/gcc.target/loongarch/conditional-move-opt-2.c b/gcc/testsuite/gcc.target/loongarch/conditional-move-opt-2.c index ac72d4d933a..743fd5e670e 100644 --- a/gcc/testsuite/gcc.target/loongarch/conditional-move-opt-2.c +++ b/gcc/testsuite/gcc.target/loongarch/conditional-move-opt-2.c @@ -29,7 +29,7 @@ void test_lez () { if (lm <= 0) - lr &= (1 << 16); + lr |= (1 << 16); lr += lm; } diff --git a/gcc/testsuite/gcc.target/loongarch/conditional-move-opt-3.c b/gcc/testsuite/gcc.target/loongarch/conditional-move-opt-3.c new file mode 100644 index 00000000000..95887980cc5 --- /dev/null +++ b/gcc/testsuite/gcc.target/loongarch/conditional-move-opt-3.c @@ -0,0 +1,14 @@ +/* { dg-do compile } */ +/* { dg-options "-O2" } */ +/* { dg-final { scan-assembler "maskeqz" } } */ +/* { dg-final { scan-assembler "masknez" } } */ + +extern long lm, ln, lr; + +void +test_and () +{ + if (lm < 0) + lr &= (1 << 16); + lr += lm; +} -- Gitee From 3261043e01f17c4175b39bd3937d01a68bc326f9 Mon Sep 17 00:00:00 2001 From: Xi Ruoyao Date: Tue, 8 Jul 2025 14:39:11 +0800 Subject: [PATCH 03/31] lra: Reallow reloading user hard registers if the insn is not asm [PR 120983] The PR 87600 fix has disallowed reloading user hard registers to resolve earlyclobber-induced conflict. However before reload, recog completely ignores the constraints of insns, so the RTL passes may produce insns where some user hard registers violate an earlyclobber. Then we'll get an ICE without reloading them, like what we are recently encountering in LoongArch test suite. IIUC "recog does not look at constraints until reload" has been a well-established rule in GCC for years and I don't have enough skill to challange it. So reallow reloading user hard registers (but still disallow doing so for asm) to fix the ICE. gcc/ChangeLog: PR rtl-optimization/120983 * lra-constraints.cc (process_alt_operands): Allow reloading user hard registers unless the insn is an asm. --- gcc/lra-constraints.cc | 19 ++++++++----------- 1 file changed, 8 insertions(+), 11 deletions(-) diff --git a/gcc/lra-constraints.cc b/gcc/lra-constraints.cc index d92ab76908c..6a003d763a6 100644 --- a/gcc/lra-constraints.cc +++ b/gcc/lra-constraints.cc @@ -2304,14 +2304,15 @@ process_alt_operands (int only_alternative) if (curr_static_id->operand[nop].type == OP_INOUT || curr_static_id->operand[m].type == OP_INOUT) break; - /* Operands don't match. If the operands are - different user defined explicit hard + /* Operands don't match. For asm if the operands + are different user defined explicit hard registers, then we cannot make them match when one is early clobber operand. */ if ((REG_P (*curr_id->operand_loc[nop]) || SUBREG_P (*curr_id->operand_loc[nop])) && (REG_P (*curr_id->operand_loc[m]) - || SUBREG_P (*curr_id->operand_loc[m]))) + || SUBREG_P (*curr_id->operand_loc[m])) + && INSN_CODE (curr_insn) < 0) { rtx nop_reg = *curr_id->operand_loc[nop]; if (SUBREG_P (nop_reg)) @@ -3129,19 +3130,15 @@ process_alt_operands (int only_alternative) first_conflict_j = j; last_conflict_j = j; /* Both the earlyclobber operand and conflicting operand - cannot both be user defined hard registers. */ + cannot both be user defined hard registers for asm. + Let curr_insn_transform diagnose it. */ if (HARD_REGISTER_P (operand_reg[i]) && REG_USERVAR_P (operand_reg[i]) && operand_reg[j] != NULL_RTX && HARD_REGISTER_P (operand_reg[j]) - && REG_USERVAR_P (operand_reg[j])) - { - /* For asm, let curr_insn_transform diagnose it. */ - if (INSN_CODE (curr_insn) < 0) + && REG_USERVAR_P (operand_reg[j]) + && INSN_CODE (curr_insn) < 0) return false; - fatal_insn ("unable to generate reloads for " - "impossible constraints:", curr_insn); - } } if (last_conflict_j < 0) continue; -- Gitee From a7e2ee8841d8213b09be9b3413cfda33158a18b5 Mon Sep 17 00:00:00 2001 From: Xi Ruoyao Date: Tue, 15 Jul 2025 03:01:12 +0800 Subject: [PATCH 04/31] LoongArch: Fix wrong code generated by TARGET_VECTORIZE_VEC_PERM_CONST [PR121064] When TARGET_VECTORIZE_VEC_PERM_CONST is called, target may be the same pseudo as op0 and/or op1. Loading the selector into target would clobber the input, producing wrong code like vld $vr0, $t0 vshuf.w $vr0, $vr0, $vr1 So don't load the selector into d->target, use a new pseudo to hold the selector instead. The reload pass will load the pseudo for selector and the pseudo for target into the same hard register (following our constraint '0' on the shuf instructions) anyway. gcc/ChangeLog: PR target/121064 * config/loongarch/lsx.md (lsx_vshuf_): Add '@' to generate a mode-aware helper. Use as the mode of the operand 1 (selector). * config/loongarch/lasx.md (lasx_xvshuf_): Likewise. * config/loongarch/loongarch.cc (loongarch_try_expand_lsx_vshuf_const): Create a new pseudo for the selector. Use the mode-aware helper to simplify the code. (loongarch_expand_vec_perm_const): Likewise. gcc/testsuite/ChangeLog: PR target/121064 * gcc.target/loongarch/pr121064.c: New test. --- gcc/config/loongarch/lasx.md | 4 +- gcc/config/loongarch/loongarch.cc | 126 +++++------------- gcc/config/loongarch/lsx.md | 4 +- gcc/testsuite/gcc.target/loongarch/pr121064.c | 38 ++++++ 4 files changed, 73 insertions(+), 99 deletions(-) create mode 100644 gcc/testsuite/gcc.target/loongarch/pr121064.c diff --git a/gcc/config/loongarch/lasx.md b/gcc/config/loongarch/lasx.md index 73ca7caaaa0..15142f8949e 100644 --- a/gcc/config/loongarch/lasx.md +++ b/gcc/config/loongarch/lasx.md @@ -2156,9 +2156,9 @@ [(set_attr "type" "simd_int_arith") (set_attr "mode" "")]) -(define_insn "lasx_xvshuf_" +(define_insn "@lasx_xvshuf_" [(set (match_operand:LASX_DWH 0 "register_operand" "=f") - (unspec:LASX_DWH [(match_operand:LASX_DWH 1 "register_operand" "0") + (unspec:LASX_DWH [(match_operand: 1 "register_operand" "0") (match_operand:LASX_DWH 2 "register_operand" "f") (match_operand:LASX_DWH 3 "register_operand" "f")] UNSPEC_LASX_XVSHUF))] diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc index b44d52adff1..883db986aae 100644 --- a/gcc/config/loongarch/loongarch.cc +++ b/gcc/config/loongarch/loongarch.cc @@ -8390,7 +8390,7 @@ static bool loongarch_try_expand_lsx_vshuf_const (struct expand_vec_perm_d *d) { int i; - rtx target, op0, op1, sel, tmp; + rtx target, op0, op1; rtx rperm[MAX_VECT_LEN]; if (GET_MODE_SIZE (d->vmode) == 16) @@ -8409,47 +8409,23 @@ loongarch_try_expand_lsx_vshuf_const (struct expand_vec_perm_d *d) for (i = 0; i < d->nelt; i += 1) rperm[i] = GEN_INT (d->perm[i]); - if (d->vmode == E_V2DFmode) - { - sel = gen_rtx_CONST_VECTOR (E_V2DImode, gen_rtvec_v (d->nelt, rperm)); - tmp = simplify_gen_subreg (E_V2DImode, d->target, d->vmode, 0); - emit_move_insn (tmp, sel); - } - else if (d->vmode == E_V4SFmode) - { - sel = gen_rtx_CONST_VECTOR (E_V4SImode, gen_rtvec_v (d->nelt, rperm)); - tmp = simplify_gen_subreg (E_V4SImode, d->target, d->vmode, 0); - emit_move_insn (tmp, sel); - } + machine_mode sel_mode = related_int_vector_mode (d->vmode) + .require (); + rtvec sel_v = gen_rtvec_v (d->nelt, rperm); + + /* Despite vshuf.* (except vshuf.b) needs sel == target, we cannot + load sel into target right now: here we are dealing with + pseudo regs, and target may be the same pseudo as one of op0 + or op1. Then we'd clobber the input. Instead, we use a new + pseudo reg here. The reload pass will look at the constraint + of vshuf.* and move sel into target first if needed. */ + rtx sel = force_reg (sel_mode, + gen_rtx_CONST_VECTOR (sel_mode, sel_v)); + + if (d->vmode == E_V16QImode) + emit_insn (gen_lsx_vshuf_b (target, op1, op0, sel)); else - { - sel = gen_rtx_CONST_VECTOR (d->vmode, gen_rtvec_v (d->nelt, rperm)); - emit_move_insn (d->target, sel); - } - - switch (d->vmode) - { - case E_V2DFmode: - emit_insn (gen_lsx_vshuf_d_f (target, target, op1, op0)); - break; - case E_V2DImode: - emit_insn (gen_lsx_vshuf_d (target, target, op1, op0)); - break; - case E_V4SFmode: - emit_insn (gen_lsx_vshuf_w_f (target, target, op1, op0)); - break; - case E_V4SImode: - emit_insn (gen_lsx_vshuf_w (target, target, op1, op0)); - break; - case E_V8HImode: - emit_insn (gen_lsx_vshuf_h (target, target, op1, op0)); - break; - case E_V16QImode: - emit_insn (gen_lsx_vshuf_b (target, op1, op0, target)); - break; - default: - break; - } + emit_insn (gen_lsx_vshuf (d->vmode, target, sel, op1, op0)); return true; } @@ -9445,7 +9421,7 @@ loongarch_expand_vec_perm_const (struct expand_vec_perm_d *d) bool flag = false; unsigned int i; unsigned char idx; - rtx target, op0, op1, sel, tmp; + rtx target, op0, op1; rtx rperm[MAX_VECT_LEN]; unsigned int remapped[MAX_VECT_LEN]; unsigned char perm2[MAX_VECT_LEN]; @@ -9625,63 +9601,23 @@ loongarch_expand_vec_perm_const (struct expand_vec_perm_d *d) expand_perm_const_end: if (flag) { - /* Copy selector vector from memory to vector register for later insn - gen function. - If vector's element in floating point value, we cannot fit - selector argument into insn gen function directly, because of the - insn template definition. As a solution, generate a integral mode - subreg of target, then copy selector vector (that is in integral - mode) to this subreg. */ - switch (d->vmode) - { - case E_V4DFmode: - sel = gen_rtx_CONST_VECTOR (E_V4DImode, gen_rtvec_v (d->nelt, - rperm)); - tmp = simplify_gen_subreg (E_V4DImode, d->target, d->vmode, 0); - emit_move_insn (tmp, sel); - break; - case E_V8SFmode: - sel = gen_rtx_CONST_VECTOR (E_V8SImode, gen_rtvec_v (d->nelt, - rperm)); - tmp = simplify_gen_subreg (E_V8SImode, d->target, d->vmode, 0); - emit_move_insn (tmp, sel); - break; - default: - sel = gen_rtx_CONST_VECTOR (d->vmode, gen_rtvec_v (d->nelt, - rperm)); - emit_move_insn (d->target, sel); - break; - } - target = d->target; op0 = d->op0; op1 = d->one_vector_p ? d->op0 : d->op1; - /* We FINALLY can generate xvshuf.* insn. */ - switch (d->vmode) - { - case E_V4DFmode: - emit_insn (gen_lasx_xvshuf_d_f (target, target, op1, op0)); - break; - case E_V4DImode: - emit_insn (gen_lasx_xvshuf_d (target, target, op1, op0)); - break; - case E_V8SFmode: - emit_insn (gen_lasx_xvshuf_w_f (target, target, op1, op0)); - break; - case E_V8SImode: - emit_insn (gen_lasx_xvshuf_w (target, target, op1, op0)); - break; - case E_V16HImode: - emit_insn (gen_lasx_xvshuf_h (target, target, op1, op0)); - break; - case E_V32QImode: - emit_insn (gen_lasx_xvshuf_b (target, op1, op0, target)); - break; - default: - gcc_unreachable (); - break; - } + machine_mode sel_mode = related_int_vector_mode (d->vmode) + .require (); + rtvec sel_v = gen_rtvec_v (d->nelt, rperm); + + /* See the comment in loongarch_expand_lsx_shuffle for why + we don't simply use a SUBREG to pun target. */ + rtx sel = force_reg (sel_mode, + gen_rtx_CONST_VECTOR (sel_mode, sel_v)); + + if (d->vmode == E_V32QImode) + emit_insn (gen_lasx_xvshuf_b (target, op1, op0, sel)); + else + emit_insn (gen_lasx_xvshuf (d->vmode, target, sel, op1, op0)); return true; } diff --git a/gcc/config/loongarch/lsx.md b/gcc/config/loongarch/lsx.md index 8cf7ad917ec..b1bccccb5e5 100644 --- a/gcc/config/loongarch/lsx.md +++ b/gcc/config/loongarch/lsx.md @@ -561,9 +561,9 @@ DONE; }) -(define_insn "lsx_vshuf_" +(define_insn "@lsx_vshuf_" [(set (match_operand:LSX_DWH 0 "register_operand" "=f") - (unspec:LSX_DWH [(match_operand:LSX_DWH 1 "register_operand" "0") + (unspec:LSX_DWH [(match_operand: 1 "register_operand" "0") (match_operand:LSX_DWH 2 "register_operand" "f") (match_operand:LSX_DWH 3 "register_operand" "f")] UNSPEC_LSX_VSHUF))] diff --git a/gcc/testsuite/gcc.target/loongarch/pr121064.c b/gcc/testsuite/gcc.target/loongarch/pr121064.c new file mode 100644 index 00000000000..a466c7abc70 --- /dev/null +++ b/gcc/testsuite/gcc.target/loongarch/pr121064.c @@ -0,0 +1,38 @@ +/* { dg-require-effective-target loongarch_sx_hw } */ +/* { dg-do run } */ +/* { dg-options "-march=loongarch64 -mfpu=64 -mlsx -O3" } */ + +typedef __INT32_TYPE__ int32_t; +typedef unsigned __INT32_TYPE__ uint32_t; + +__attribute__ ((noipa)) static int32_t +long_filter_ehigh_3830_1 (int32_t *buffer, int length) +{ + int i, j; + int32_t dotprod = 0; + int32_t delay[4] = { 0 }; + uint32_t coeffs[4] = { 0 }; + + for (i = 0; i < length; i++) + { + dotprod = 0; + for (j = 3; j >= 0; j--) + { + dotprod += delay[j] * coeffs[j]; + coeffs[j] += ((delay[j] >> 31) | 1); + } + for (j = 3; j > 0; j--) + delay[j] = delay[j - 1]; + delay[0] = buffer[i]; + } + + return dotprod; +} + +int +main () +{ + int32_t buffer[] = { -1, 1 }; + if (long_filter_ehigh_3830_1 (buffer, 2) != -1) + __builtin_trap (); +} -- Gitee From dfc0fd2eec82d5363f0a331cd71a52a5ec682ece Mon Sep 17 00:00:00 2001 From: Lulu Cheng Date: Thu, 24 Jul 2025 19:07:25 +0800 Subject: [PATCH 05/31] LoongArch: Remove the definition of CASE_VECTOR_SHORTEN_MODE. On LoongArch, the switch jump-table always stores absolute addresses, so there is no need to define the macro CASE_VECTOR_SHORTEN_MODE. gcc/ChangeLog: * config/loongarch/loongarch.h (CASE_VECTOR_SHORTEN_MODE): Delete. --- gcc/config/loongarch/loongarch.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/gcc/config/loongarch/loongarch.h b/gcc/config/loongarch/loongarch.h index 10b5a0b0f32..62307b94b53 100644 --- a/gcc/config/loongarch/loongarch.h +++ b/gcc/config/loongarch/loongarch.h @@ -824,8 +824,6 @@ typedef struct { #define CASE_VECTOR_MODE Pmode -#define CASE_VECTOR_SHORTEN_MODE(MIN, MAX, BODY) Pmode - /* Define this as 1 if `char' should by default be signed; else as 0. */ #ifndef DEFAULT_SIGNED_CHAR #define DEFAULT_SIGNED_CHAR 1 -- Gitee From 8ce0be51b32c791248c2f16156562a1c4ffd34ba Mon Sep 17 00:00:00 2001 From: mengqinggang Date: Fri, 8 Aug 2025 16:22:59 +0800 Subject: [PATCH 06/31] LoongArch: macro instead enum for base abi type enum can't be used in #if. For #if expression, identifiers that are not macros, which are all considered to be the number zero. This patch may fix https://sourceware.org/bugzilla/show_bug.cgi?id=32776. gcc/ChangeLog: * config/loongarch/loongarch-def.h (ABI_BASE_LP64D): New macro. (ABI_BASE_LP64F): New macro. (ABI_BASE_LP64S): New macro. (N_ABI_BASE_TYPES): New macro. --- gcc/config/loongarch/loongarch-def.h | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/gcc/config/loongarch/loongarch-def.h b/gcc/config/loongarch/loongarch-def.h index 10b5f9ddc01..06eb107d391 100644 --- a/gcc/config/loongarch/loongarch-def.h +++ b/gcc/config/loongarch/loongarch-def.h @@ -78,12 +78,10 @@ extern loongarch_def_array /* Base ABI */ -enum { - ABI_BASE_LP64D = 0, - ABI_BASE_LP64F = 1, - ABI_BASE_LP64S = 2, - N_ABI_BASE_TYPES = 3 -}; +#define ABI_BASE_LP64D 0 +#define ABI_BASE_LP64F 1 +#define ABI_BASE_LP64S 2 +#define N_ABI_BASE_TYPES 3 extern loongarch_def_array loongarch_abi_base_strings; -- Gitee From 0ef1c9545d83b7c7985e42cd611fe3de2761d86b Mon Sep 17 00:00:00 2001 From: Lulu Cheng Date: Wed, 18 Mar 2026 14:38:42 +0800 Subject: [PATCH 07/31] LoongArch: Define hook TARGET_COMPUTE_PRESSURE_CLASSES[PR120476]. The rtx cost value defined by the target backend affects the calculation of register pressure classes in the IRA, thus affecting scheduling. This may cause program performance degradation. For example, OpenSSL 3.5.1 SHA512 and SPEC CPU 2017 exchange_r. This problem can be avoided by defining a set of register pressure classes in the target backend instead of using the default IRA to automatically calculate them. --- gcc/config/loongarch/loongarch.cc | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc index 883db986aae..8f7ad207efc 100644 --- a/gcc/config/loongarch/loongarch.cc +++ b/gcc/config/loongarch/loongarch.cc @@ -11147,6 +11147,18 @@ loongarch_noce_conversion_profitable_p (rtx_insn *seq, struct noce_if_info *if_i return default_noce_conversion_profitable_p (seq, if_info); } +/* Implement TARGET_COMPUTE_PRESSURE_CLASSES. */ + +static int +loongarch_compute_pressure_classes (reg_class *classes) +{ + int i = 0; + classes[i++] = GENERAL_REGS; + classes[i++] = FP_REGS; + classes[i++] = FCC_REGS; + return i; +} + /* Initialize the GCC target structure. */ #undef TARGET_ASM_ALIGNED_HI_OP #define TARGET_ASM_ALIGNED_HI_OP "\t.half\t" @@ -11421,6 +11433,9 @@ loongarch_noce_conversion_profitable_p (rtx_insn *seq, struct noce_if_info *if_i #undef TARGET_NOCE_CONVERSION_PROFITABLE_P #define TARGET_NOCE_CONVERSION_PROFITABLE_P loongarch_noce_conversion_profitable_p +#undef TARGET_COMPUTE_PRESSURE_CLASSES +#define TARGET_COMPUTE_PRESSURE_CLASSES loongarch_compute_pressure_classes + struct gcc_target targetm = TARGET_INITIALIZER; #include "gt-loongarch.h" -- Gitee From 5de5d2fb3aedf487764588c1de60fde61a91a47e Mon Sep 17 00:00:00 2001 From: Lulu Cheng Date: Thu, 14 Aug 2025 11:59:53 +0800 Subject: [PATCH 08/31] LoongArch: Fix ICE caused by function add_stmt_cost[PR121542]. PR target/121542 gcc/ChangeLog: * config/loongarch/loongarch.cc (loongarch_vector_costs::add_stmt_cost): When using vectype, first determine whether it is NULL. gcc/testsuite/ChangeLog: * gcc.target/loongarch/pr121542.c: New test. --- gcc/config/loongarch/loongarch.cc | 1 + gcc/testsuite/gcc.target/loongarch/pr121542.c | 54 +++++++++++++++++++ 2 files changed, 55 insertions(+) create mode 100644 gcc/testsuite/gcc.target/loongarch/pr121542.c diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc index 8f7ad207efc..929fe72e5c9 100644 --- a/gcc/config/loongarch/loongarch.cc +++ b/gcc/config/loongarch/loongarch.cc @@ -4380,6 +4380,7 @@ loongarch_vector_costs::add_stmt_cost (int count, vect_cost_for_stmt kind, break; } else if (TARGET_RECIP_VEC_DIV + && vectype && gimple_code (stmt_info->stmt) == GIMPLE_ASSIGN) { machine_mode mode = TYPE_MODE (vectype); diff --git a/gcc/testsuite/gcc.target/loongarch/pr121542.c b/gcc/testsuite/gcc.target/loongarch/pr121542.c new file mode 100644 index 00000000000..51a5e3c4480 --- /dev/null +++ b/gcc/testsuite/gcc.target/loongarch/pr121542.c @@ -0,0 +1,54 @@ +/* { dg-do compile } */ +/* { dg-options "-mrecip=all -mfrecipe -mabi=lp64d -march=loongarch64 -mfpu=64 -msimd=lasx -Ofast" } */ + +typedef long unsigned int STRLEN; +typedef struct sv SV; +struct sv +{ + void *sv_any; + unsigned int sv_refcnt; + unsigned int sv_flags; +}; +typedef struct xpv XPV; +struct xpv +{ + char *xpv_pv; + STRLEN xpv_cur; + STRLEN xpv_len; +}; +typedef unsigned long UV; +extern char *PL_bufend; +extern char *d; +SV *Perl_newSV (STRLEN len); + +char * +S_scan_const (char *start) +{ + register char *send = PL_bufend; + SV *sv = Perl_newSV (send - start); + register char *s = start; + UV uv; + + while (s < send) + { + if (!(((UV)(uv)) < 0x80)) + { + int hicount = 0; + unsigned char *c; + for (c = (unsigned char *)((XPV *)(sv)->sv_any)->xpv_pv; + c < (unsigned char *)d; c++) + { + if (!(((UV)(*c)) < 0x80)) + { + hicount++; + } + } + d += hicount; + *d++ = (char)uv; + } + + s++; + } + + return s; +} -- Gitee From f0ba5dcedebdf83eb9e0f74dc14211e3b1b55951 Mon Sep 17 00:00:00 2001 From: Xi Ruoyao Date: Sat, 1 Mar 2025 11:46:41 +0800 Subject: [PATCH 09/31] LoongArch: (NFC) Remove atomic_optab and use amop instead They are the same. gcc/ChangeLog: * config/loongarch/sync.md (atomic_optab): Remove. (atomic_): Change atomic_optab to amop. (atomic_fetch_): Likewise. --- gcc/config/loongarch/sync.md | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/gcc/config/loongarch/sync.md b/gcc/config/loongarch/sync.md index 2e008c4874e..92962ba6429 100644 --- a/gcc/config/loongarch/sync.md +++ b/gcc/config/loongarch/sync.md @@ -35,8 +35,6 @@ ]) (define_code_iterator any_atomic [plus ior xor and]) -(define_code_attr atomic_optab - [(plus "add") (ior "or") (xor "xor") (and "and")]) ;; This attribute gives the format suffix for atomic memory operations. (define_mode_attr amo [(QI "b") (HI "h") (SI "w") (DI "d")]) @@ -175,7 +173,7 @@ } [(set (attr "length") (const_int 12))]) -(define_insn "atomic_" +(define_insn "atomic_" [(set (match_operand:GPR 0 "memory_operand" "+ZB") (unspec_volatile:GPR [(any_atomic:GPR (match_dup 0) @@ -197,7 +195,7 @@ "amadd%A2.\t$zero,%z1,%0" [(set (attr "length") (const_int 4))]) -(define_insn "atomic_fetch_" +(define_insn "atomic_fetch_" [(set (match_operand:GPR 0 "register_operand" "=&r") (match_operand:GPR 1 "memory_operand" "+ZB")) (set (match_dup 1) -- Gitee From 2a071039c38de737cdfe0d3c13dafe1e4accfa79 Mon Sep 17 00:00:00 2001 From: Xi Ruoyao Date: Sat, 1 Mar 2025 11:46:42 +0800 Subject: [PATCH 10/31] LoongArch: (NFC) Remove amo and use size instead They are the same. gcc/ChangeLog: * config/loongarch/sync.md: Use instead of . (amo): Remove. --- gcc/config/loongarch/sync.md | 53 +++++++++++++++++------------------- 1 file changed, 25 insertions(+), 28 deletions(-) diff --git a/gcc/config/loongarch/sync.md b/gcc/config/loongarch/sync.md index 92962ba6429..5fdb149ee46 100644 --- a/gcc/config/loongarch/sync.md +++ b/gcc/config/loongarch/sync.md @@ -36,9 +36,6 @@ (define_code_iterator any_atomic [plus ior xor and]) -;; This attribute gives the format suffix for atomic memory operations. -(define_mode_attr amo [(QI "b") (HI "h") (SI "w") (DI "d")]) - ;; expands to the name of the atomic operand that implements a ;; particular code. (define_code_attr amop [(ior "or") (xor "xor") (and "and") (plus "add")]) @@ -181,7 +178,7 @@ (match_operand:SI 2 "const_int_operand")] ;; model UNSPEC_SYNC_OLD_OP))] "" - "am%A2.\t$zero,%z1,%0" + "am%A2.\t$zero,%z1,%0" [(set (attr "length") (const_int 4))]) (define_insn "atomic_add" @@ -192,7 +189,7 @@ (match_operand:SI 2 "const_int_operand")] ;; model UNSPEC_SYNC_OLD_OP))] "ISA_HAS_LAM_BH" - "amadd%A2.\t$zero,%z1,%0" + "amadd%A2.\t$zero,%z1,%0" [(set (attr "length") (const_int 4))]) (define_insn "atomic_fetch_" @@ -205,7 +202,7 @@ (match_operand:SI 3 "const_int_operand")] ;; model UNSPEC_SYNC_OLD_OP))] "" - "am%A3.\t%0,%z2,%1" + "am%A3.\t%0,%z2,%1" [(set (attr "length") (const_int 4))]) (define_insn "atomic_exchange" @@ -217,7 +214,7 @@ (set (match_dup 1) (match_operand:GPR 2 "register_operand" "r"))] "" - "amswap%A3.\t%0,%z2,%1" + "amswap%A3.\t%0,%z2,%1" [(set (attr "length") (const_int 4))]) (define_insn "atomic_exchange_short" @@ -229,7 +226,7 @@ (set (match_dup 1) (match_operand:SHORT 2 "register_operand" "r"))] "ISA_HAS_LAM_BH" - "amswap%A3.\t%0,%z2,%1" + "amswap%A3.\t%0,%z2,%1" [(set (attr "length") (const_int 4))]) (define_insn "atomic_cas_value_strong" @@ -244,7 +241,7 @@ "" { output_asm_insn ("1:", operands); - output_asm_insn ("ll.\t%0,%1", operands); + output_asm_insn ("ll.\t%0,%1", operands); /* Like the test case atomic-cas-int.C, in loongarch64, O1 and higher, the return value of the val_without_const_folding will not be truncated and @@ -264,7 +261,7 @@ output_asm_insn ("bne\t%0,%z2,2f", operands); output_asm_insn ("or%i3\t%5,$zero,%3", operands); - output_asm_insn ("sc.\t%5,%1", operands); + output_asm_insn ("sc.\t%5,%1", operands); output_asm_insn ("beqz\t%5,1b", operands); output_asm_insn ("b\t3f", operands); output_asm_insn ("2:", operands); @@ -289,7 +286,7 @@ (match_operand:SI 4 "const_int_operand")] ;; mod_s UNSPEC_COMPARE_AND_SWAP))] "ISA_HAS_LAMCAS" - "ori\t%0,%z2,0\n\tamcas%A4.\t%0,%z3,%1" + "ori\t%0,%z2,0\n\tamcas%A4.\t%0,%z3,%1" [(set (attr "length") (const_int 8))]) (define_expand "atomic_compare_and_swap" @@ -404,12 +401,12 @@ "" { return "1:\\n\\t" - "ll.\\t%0,%1\\n\\t" + "ll.\\t%0,%1\\n\\t" "and\\t%7,%0,%2\\n\\t" "bne\\t%7,%z4,2f\\n\\t" "and\\t%7,%0,%z3\\n\\t" "or%i5\\t%7,%7,%5\\n\\t" - "sc.\\t%7,%1\\n\\t" + "sc.\\t%7,%1\\n\\t" "beq\\t$zero,%7,1b\\n\\t" "b\\t3f\\n\\t" "2:\\n\\t" @@ -494,12 +491,12 @@ "" { return "1:\\n\\t" - "ll.\\t%0,%1\\n\\t" + "ll.\\t%0,%1\\n\\t" "and\\t%7,%0,%3\\n\\t" "add.w\\t%8,%0,%z5\\n\\t" "and\\t%8,%8,%z2\\n\\t" "or%i8\\t%7,%7,%8\\n\\t" - "sc.\\t%7,%1\\n\\t" + "sc.\\t%7,%1\\n\\t" "beq\\t$zero,%7,1b"; } @@ -520,12 +517,12 @@ "" { return "1:\\n\\t" - "ll.\\t%0,%1\\n\\t" + "ll.\\t%0,%1\\n\\t" "and\\t%7,%0,%3\\n\\t" "sub.w\\t%8,%0,%z5\\n\\t" "and\\t%8,%8,%z2\\n\\t" "or%i8\\t%7,%7,%8\\n\\t" - "sc.\\t%7,%1\\n\\t" + "sc.\\t%7,%1\\n\\t" "beq\\t$zero,%7,1b"; } [(set (attr "length") (const_int 28))]) @@ -545,12 +542,12 @@ "" { return "1:\\n\\t" - "ll.\\t%0,%1\\n\\t" + "ll.\\t%0,%1\\n\\t" "and\\t%7,%0,%3\\n\\t" "and\\t%8,%0,%z5\\n\\t" "and\\t%8,%8,%z2\\n\\t" "or%i8\\t%7,%7,%8\\n\\t" - "sc.\\t%7,%1\\n\\t" + "sc.\\t%7,%1\\n\\t" "beq\\t$zero,%7,1b"; } [(set (attr "length") (const_int 28))]) @@ -570,12 +567,12 @@ "" { return "1:\\n\\t" - "ll.\\t%0,%1\\n\\t" + "ll.\\t%0,%1\\n\\t" "and\\t%7,%0,%3\\n\\t" "xor\\t%8,%0,%z5\\n\\t" "and\\t%8,%8,%z2\\n\\t" "or%i8\\t%7,%7,%8\\n\\t" - "sc.\\t%7,%1\\n\\t" + "sc.\\t%7,%1\\n\\t" "beq\\t$zero,%7,1b"; } @@ -596,12 +593,12 @@ "" { return "1:\\n\\t" - "ll.\\t%0,%1\\n\\t" + "ll.\\t%0,%1\\n\\t" "and\\t%7,%0,%3\\n\\t" "or\\t%8,%0,%z5\\n\\t" "and\\t%8,%8,%z2\\n\\t" "or%i8\\t%7,%7,%8\\n\\t" - "sc.\\t%7,%1\\n\\t" + "sc.\\t%7,%1\\n\\t" "beq\\t$zero,%7,1b"; } @@ -622,12 +619,12 @@ "" { return "1:\\n\\t" - "ll.\\t%0,%1\\n\\t" + "ll.\\t%0,%1\\n\\t" "and\\t%7,%0,%3\\n\\t" "and\\t%8,%0,%z5\\n\\t" "xor\\t%8,%8,%z2\\n\\t" "or%i8\\t%7,%7,%8\\n\\t" - "sc.\\t%7,%1\\n\\t" + "sc.\\t%7,%1\\n\\t" "beq\\t$zero,%7,1b"; } [(set (attr "length") (const_int 28))]) @@ -646,10 +643,10 @@ "" { return "1:\\n\\t" - "ll.\\t%0,%1\\n\\t" + "ll.\\t%0,%1\\n\\t" "and\\t%7,%0,%z3\\n\\t" "or%i5\\t%7,%7,%5\\n\\t" - "sc.\\t%7,%1\\n\\t" + "sc.\\t%7,%1\\n\\t" "beqz\\t%7,1b\\n\\t"; } [(set (attr "length") (const_int 20))]) @@ -686,7 +683,7 @@ (match_operand:SI 3 "const_int_operand")] ;; model UNSPEC_SYNC_OLD_OP))] "ISA_HAS_LAM_BH" - "amadd%A3.\t%0,%z2,%1" + "amadd%A3.\t%0,%z2,%1" [(set (attr "length") (const_int 4))]) (define_expand "atomic_fetch_add" -- Gitee From 1ac0e9b8e0f6123a82ec282f86955b954d9ef72c Mon Sep 17 00:00:00 2001 From: Xi Ruoyao Date: Sat, 1 Mar 2025 11:46:43 +0800 Subject: [PATCH 11/31] LoongArch: Don't use "+" for atomic_{load, store} "m" constraint Atomic load does not modify the memory. Atomic store does not read the memory, thus we can use "=" instead. gcc/ChangeLog: * config/loongarch/sync.md (atomic_load): Remove "+" for the memory operand. (atomic_store): Use "=" instead of "+" for the memory operand. --- gcc/config/loongarch/sync.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/gcc/config/loongarch/sync.md b/gcc/config/loongarch/sync.md index 5fdb149ee46..ef1b9118d64 100644 --- a/gcc/config/loongarch/sync.md +++ b/gcc/config/loongarch/sync.md @@ -102,7 +102,7 @@ (define_insn "atomic_load" [(set (match_operand:QHWD 0 "register_operand" "=r") (unspec_volatile:QHWD - [(match_operand:QHWD 1 "memory_operand" "+m") + [(match_operand:QHWD 1 "memory_operand" "m") (match_operand:SI 2 "const_int_operand")] ;; model UNSPEC_ATOMIC_LOAD))] "" @@ -139,7 +139,7 @@ ;; Implement atomic stores with amoswap. Fall back to fences for atomic loads. (define_insn "atomic_store" - [(set (match_operand:QHWD 0 "memory_operand" "+m") + [(set (match_operand:QHWD 0 "memory_operand" "=m") (unspec_volatile:QHWD [(match_operand:QHWD 1 "reg_or_0_operand" "rJ") (match_operand:SI 2 "const_int_operand")] ;; model -- Gitee From 800a3913d2b0c75e498aa1fd2a277695cdd90bd6 Mon Sep 17 00:00:00 2001 From: Xi Ruoyao Date: Sat, 1 Mar 2025 11:46:44 +0800 Subject: [PATCH 12/31] LoongArch: Allow using bstrins for masking the address in atomic_test_and_set We can use bstrins for masking the address here. As people are already working on LA32R (which lacks bstrins instructions), for future-proofing we check whether (const_int -4) is an and_operand and force it into an register if not. gcc/ChangeLog: * config/loongarch/sync.md (atomic_test_and_set): Use bstrins for masking the address if possible. --- gcc/config/loongarch/sync.md | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/gcc/config/loongarch/sync.md b/gcc/config/loongarch/sync.md index ef1b9118d64..affab1f6fcf 100644 --- a/gcc/config/loongarch/sync.md +++ b/gcc/config/loongarch/sync.md @@ -356,12 +356,13 @@ rtx mem = operands[1]; rtx model = operands[2]; rtx addr = force_reg (Pmode, XEXP (mem, 0)); - rtx tmp_reg = gen_reg_rtx (Pmode); - rtx zero_reg = gen_rtx_REG (Pmode, 0); - + rtx mask = gen_int_mode (-4, Pmode); rtx aligned_addr = gen_reg_rtx (Pmode); - emit_move_insn (tmp_reg, gen_rtx_PLUS (Pmode, zero_reg, GEN_INT (-4))); - emit_move_insn (aligned_addr, gen_rtx_AND (Pmode, addr, tmp_reg)); + + if (!and_operand (mask, Pmode)) + mask = force_reg (Pmode, mask); + + emit_move_insn (aligned_addr, gen_rtx_AND (Pmode, addr, mask)); rtx aligned_mem = change_address (mem, SImode, aligned_addr); set_mem_alias_set (aligned_mem, 0); -- Gitee From 78e8063efb4764085887598c9ee56043c1bde7c0 Mon Sep 17 00:00:00 2001 From: Xi Ruoyao Date: Sat, 1 Mar 2025 11:46:45 +0800 Subject: [PATCH 13/31] LoongArch: Don't emit overly-restrictive barrier for LL-SC loops For LL-SC loops, if the atomic operation has succeeded, the SC instruction always imply a full barrier, so the barrier we manually inserted only needs to take the account for the failure memorder, not the success memorder (the barrier is skipped with "b 3f" on success anyway). Note that if we use the AMCAS instructions, we indeed need to consider both the success memorder an the failure memorder deciding if "_db" suffix is needed. Thus the semantics of atomic_cas_value_strong and atomic_cas_value_strong_amcas start to be different. To prevent the compiler from being too clever, use a different unspec code for AMCAS instructions. gcc/ChangeLog: * config/loongarch/sync.md (UNSPEC_COMPARE_AND_SWAP_AMCAS): New UNSPEC code. (atomic_cas_value_strong): NFC, update the comment to note we only need to consider failure memory order. (atomic_cas_value_strong_amcas): Use UNSPEC_COMPARE_AND_SWAP_AMCAS instead of UNSPEC_COMPARE_AND_SWAP. (atomic_compare_and_swap): Pass failure memorder to gen_atomic_cas_value_strong. (atomic_compare_and_swap): Pass failure memorder to gen_atomic_cas_value_cmp_and_7_si. --- gcc/config/loongarch/sync.md | 21 +++++++++------------ 1 file changed, 9 insertions(+), 12 deletions(-) diff --git a/gcc/config/loongarch/sync.md b/gcc/config/loongarch/sync.md index affab1f6fcf..f19ad6af4d3 100644 --- a/gcc/config/loongarch/sync.md +++ b/gcc/config/loongarch/sync.md @@ -21,6 +21,7 @@ (define_c_enum "unspec" [ UNSPEC_COMPARE_AND_SWAP + UNSPEC_COMPARE_AND_SWAP_AMCAS UNSPEC_COMPARE_AND_SWAP_ADD UNSPEC_COMPARE_AND_SWAP_SUB UNSPEC_COMPARE_AND_SWAP_AND @@ -235,7 +236,7 @@ (set (match_dup 1) (unspec_volatile:GPR [(match_operand:GPR 2 "reg_or_0_operand" "rJ") (match_operand:GPR 3 "reg_or_0_operand" "rJ") - (match_operand:SI 4 "const_int_operand")] ;; mod_s + (match_operand:SI 4 "const_int_operand")] ;; mod_f UNSPEC_COMPARE_AND_SWAP)) (clobber (match_scratch:GPR 5 "=&r"))] "" @@ -283,8 +284,8 @@ (set (match_dup 1) (unspec_volatile:QHWD [(match_operand:QHWD 2 "reg_or_0_operand" "rJ") (match_operand:QHWD 3 "reg_or_0_operand" "rJ") - (match_operand:SI 4 "const_int_operand")] ;; mod_s - UNSPEC_COMPARE_AND_SWAP))] + (match_operand:SI 4 "const_int_operand")] ;; mod + UNSPEC_COMPARE_AND_SWAP_AMCAS))] "ISA_HAS_LAMCAS" "ori\t%0,%z2,0\n\tamcas%A4.\t%0,%z3,%1" [(set (attr "length") (const_int 8))]) @@ -313,16 +314,14 @@ && is_mm_release (memmodel_base (INTVAL (mod_s)))) mod_s = GEN_INT (MEMMODEL_ACQ_REL); - operands[6] = mod_s; - if (ISA_HAS_LAMCAS) emit_insn (gen_atomic_cas_value_strong_amcas (operands[1], operands[2], operands[3], operands[4], - operands[6])); + mod_s)); else emit_insn (gen_atomic_cas_value_strong (operands[1], operands[2], operands[3], operands[4], - operands[6])); + mod_f)); rtx compare = operands[1]; if (operands[3] != const0_rtx) @@ -396,7 +395,7 @@ (match_operand:GPR 3 "reg_or_0_operand" "rJ") (match_operand:GPR 4 "reg_or_0_operand" "rJ") (match_operand:GPR 5 "reg_or_0_operand" "rJ") - (match_operand:SI 6 "const_int_operand")] ;; model + (match_operand:SI 6 "const_int_operand")] ;; mod_f UNSPEC_COMPARE_AND_SWAP)) (clobber (match_scratch:GPR 7 "=&r"))] "" @@ -440,18 +439,16 @@ && is_mm_release (memmodel_base (INTVAL (mod_s)))) mod_s = GEN_INT (MEMMODEL_ACQ_REL); - operands[6] = mod_s; - if (ISA_HAS_LAMCAS) emit_insn (gen_atomic_cas_value_strong_amcas (operands[1], operands[2], operands[3], operands[4], - operands[6])); + mod_s)); else { union loongarch_gen_fn_ptrs generator; generator.fn_7 = gen_atomic_cas_value_cmp_and_7_si; loongarch_expand_atomic_qihi (generator, operands[1], operands[2], - operands[3], operands[4], operands[6]); + operands[3], operands[4], mod_f); } rtx compare = operands[1]; -- Gitee From e0c009c7f2155ad1a316c3162aace5c369e6d4c0 Mon Sep 17 00:00:00 2001 From: Xi Ruoyao Date: Sat, 1 Mar 2025 11:46:46 +0800 Subject: [PATCH 14/31] LoongArch: Remove unneeded "b 3f" instruction after LL-SC loops This instruction is used to skip an redundant barrier if -mno-ld-seq-sa or the memory model requires a barrier on failure. But with -mld-seq-sa and other memory models the barrier may be nonexisting at all, and we should remove the "b 3f" instruction as well. The implementation uses a new operand modifier "%T" to output a comment marker if the operand is a memory order for which the barrier won't be generated. "%T", and also "%t", are not really used before and the code for them in loongarch_print_operand_reloc is just some MIPS legacy. gcc/ChangeLog: * config/loongarch/loongarch.cc (loongarch_print_operand_reloc): Make "%T" output a comment marker if the operand is a memory order for which the barrier won't be generated; remove "%t". * config/loongarch/sync.md (atomic_cas_value_strong): Add %T before "b 3f". (atomic_cas_value_cmp_and_7_): Likewise. --- gcc/config/loongarch/loongarch.cc | 19 ++++++++----------- gcc/config/loongarch/sync.md | 4 ++-- 2 files changed, 10 insertions(+), 13 deletions(-) diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc index 929fe72e5c9..4690c2a2d96 100644 --- a/gcc/config/loongarch/loongarch.cc +++ b/gcc/config/loongarch/loongarch.cc @@ -6234,9 +6234,7 @@ loongarch_print_operand_reloc (FILE *file, rtx op, bool hi64_part, 'Q' Print R_LARCH_RELAX for TLS IE. 'r' Print address 12-31bit relocation associated with OP. 'R' Print address 32-51bit relocation associated with OP. - 'T' Print 'f' for (eq:CC ...), 't' for (ne:CC ...), - 'z' for (eq:?I ...), 'n' for (ne:?I ...). - 't' Like 'T', but with the EQ/NE cases reversed + 'T' Print a comment marker if %G outputs nothing. 'u' Print a LASX register. 'v' Print the insn size suffix b, h, w or d for vector modes V16QI, V8HI, V4SI, V2SI, and w, d for vector modes V4SF, V2DF respectively. @@ -6315,6 +6313,13 @@ loongarch_print_operand (FILE *file, rtx op, int letter) fputs ("dbar\t0x700", file); break; + case 'T': + if (!loongarch_cas_failure_memorder_needs_acquire ( + memmodel_from_int (INTVAL (op))) + && ISA_HAS_LD_SEQ_SA) + fprintf (file, "%s", ASM_COMMENT_START); + break; + case 'h': if (code == HIGH) op = XEXP (op, 0); @@ -6393,14 +6398,6 @@ loongarch_print_operand (FILE *file, rtx op, int letter) false /* lo_reloc */); break; - case 't': - case 'T': - { - int truth = (code == NE) == (letter == 'T'); - fputc ("zfnt"[truth * 2 + FCC_REG_P (REGNO (XEXP (op, 0)))], file); - } - break; - case 'V': if (CONST_VECTOR_P (op)) { diff --git a/gcc/config/loongarch/sync.md b/gcc/config/loongarch/sync.md index f19ad6af4d3..89a02eebf85 100644 --- a/gcc/config/loongarch/sync.md +++ b/gcc/config/loongarch/sync.md @@ -264,7 +264,7 @@ output_asm_insn ("or%i3\t%5,$zero,%3", operands); output_asm_insn ("sc.\t%5,%1", operands); output_asm_insn ("beqz\t%5,1b", operands); - output_asm_insn ("b\t3f", operands); + output_asm_insn ("%T4b\t3f", operands); output_asm_insn ("2:", operands); output_asm_insn ("%G4", operands); output_asm_insn ("3:", operands); @@ -408,7 +408,7 @@ "or%i5\\t%7,%7,%5\\n\\t" "sc.\\t%7,%1\\n\\t" "beq\\t$zero,%7,1b\\n\\t" - "b\\t3f\\n\\t" + "%T6b\\t3f\\n\\t" "2:\\n\\t" "%G6\\n\\t" "3:\\n\\t"; -- Gitee From 528d525e607367e924a6f9d04606e162136d2c93 Mon Sep 17 00:00:00 2001 From: Xi Ruoyao Date: Sat, 1 Mar 2025 11:46:47 +0800 Subject: [PATCH 15/31] LoongArch: Remove unneeded "andi offset, addr, 3" instruction in atomic_test_and_set On LoongArch sll.w and srl.w instructions only take the [4:0] bits of rk (shift amount) into account, and we've already defined SHIFT_COUNT_TRUNCATED to 1 so the compiler knows this fact, thus we don't need this instruction. gcc/ChangeLog: * config/loongarch/sync.md (atomic_test_and_set): Remove unneeded andi instruction from the expansion. --- gcc/config/loongarch/sync.md | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/gcc/config/loongarch/sync.md b/gcc/config/loongarch/sync.md index 89a02eebf85..0c1ece987e2 100644 --- a/gcc/config/loongarch/sync.md +++ b/gcc/config/loongarch/sync.md @@ -366,15 +366,14 @@ rtx aligned_mem = change_address (mem, SImode, aligned_addr); set_mem_alias_set (aligned_mem, 0); - rtx offset = gen_reg_rtx (SImode); - emit_move_insn (offset, gen_rtx_AND (SImode, gen_lowpart (SImode, addr), - GEN_INT (3))); - rtx tmp = gen_reg_rtx (SImode); emit_move_insn (tmp, GEN_INT (1)); + /* Note that we have defined SHIFT_COUNT_TRUNCATED to 1, so we don't need + to mask addr with 0b11 here. */ rtx shmt = gen_reg_rtx (SImode); - emit_move_insn (shmt, gen_rtx_ASHIFT (SImode, offset, GEN_INT (3))); + emit_move_insn (shmt, gen_rtx_ASHIFT (SImode, gen_lowpart (SImode, addr), + GEN_INT (3))); rtx word = gen_reg_rtx (SImode); emit_move_insn (word, gen_rtx_ASHIFT (SImode, tmp, shmt)); -- Gitee From a0c1f4c3092590980b8ec33fa266be90f76dfa42 Mon Sep 17 00:00:00 2001 From: Xi Ruoyao Date: Sat, 1 Mar 2025 11:46:48 +0800 Subject: [PATCH 16/31] LoongArch: Implement subword atomic_fetch_{and, or, xor} with am*.w instructions We can just shift the mask and fill the other bits with 0 (for ior/xor) or 1 (for and), and use an am*.w instruction to perform the atomic operation, instead of using a LL-SC loop. gcc/ChangeLog: * config/loongarch/sync.md (UNSPEC_COMPARE_AND_SWAP_AND): Remove. (UNSPEC_COMPARE_AND_SWAP_XOR): Remove. (UNSPEC_COMPARE_AND_SWAP_OR): Remove. (atomic_test_and_set): Rename to ... (atomic_fetch_): ... this, and adapt the expansion to use it for any bitwise operations and any val, instead of just ior 1. (atomic_test_and_set): New define_expand. --- gcc/config/loongarch/sync.md | 177 +++++++---------------------------- 1 file changed, 34 insertions(+), 143 deletions(-) diff --git a/gcc/config/loongarch/sync.md b/gcc/config/loongarch/sync.md index 0c1ece987e2..14bf480f011 100644 --- a/gcc/config/loongarch/sync.md +++ b/gcc/config/loongarch/sync.md @@ -24,9 +24,6 @@ UNSPEC_COMPARE_AND_SWAP_AMCAS UNSPEC_COMPARE_AND_SWAP_ADD UNSPEC_COMPARE_AND_SWAP_SUB - UNSPEC_COMPARE_AND_SWAP_AND - UNSPEC_COMPARE_AND_SWAP_XOR - UNSPEC_COMPARE_AND_SWAP_OR UNSPEC_COMPARE_AND_SWAP_NAND UNSPEC_SYNC_OLD_OP UNSPEC_SYNC_EXCHANGE @@ -343,17 +340,18 @@ DONE; }) -(define_expand "atomic_test_and_set" - [(match_operand:QI 0 "register_operand" "") ;; bool output - (match_operand:QI 1 "memory_operand" "+ZB") ;; memory - (match_operand:SI 2 "const_int_operand" "")] ;; model +(define_expand "atomic_fetch_" + [(match_operand:SHORT 0 "register_operand" "") ;; output + (any_bitwise (match_operand:SHORT 1 "memory_operand" "+ZB") ;; memory + (match_operand:SHORT 2 "reg_or_0_operand" "rJ")) ;; val + (match_operand:SI 3 "const_int_operand" "")] ;; model "" { - /* We have no QImode atomics, so use the address LSBs to form a mask, - then use an aligned SImode atomic. */ + /* We have no QI/HImode bitwise atomics, so use the address LSBs to form + a mask, then use an aligned SImode atomic. */ rtx result = operands[0]; rtx mem = operands[1]; - rtx model = operands[2]; + rtx model = operands[3]; rtx addr = force_reg (Pmode, XEXP (mem, 0)); rtx mask = gen_int_mode (-4, Pmode); rtx aligned_addr = gen_reg_rtx (Pmode); @@ -367,7 +365,8 @@ set_mem_alias_set (aligned_mem, 0); rtx tmp = gen_reg_rtx (SImode); - emit_move_insn (tmp, GEN_INT (1)); + emit_move_insn (tmp, simplify_gen_unary (ZERO_EXTEND, SImode, + operands[2], mode)); /* Note that we have defined SHIFT_COUNT_TRUNCATED to 1, so we don't need to mask addr with 0b11 here. */ @@ -378,14 +377,37 @@ rtx word = gen_reg_rtx (SImode); emit_move_insn (word, gen_rtx_ASHIFT (SImode, tmp, shmt)); + if () + { + /* word = word | ~(mode_mask << shmt) */ + rtx tmp = force_reg (SImode, + gen_int_mode (GET_MODE_MASK (mode), + SImode)); + emit_move_insn (tmp, gen_rtx_ASHIFT (SImode, tmp, shmt)); + emit_move_insn (word, gen_rtx_IOR (SImode, gen_rtx_NOT (SImode, tmp), + word)); + } + tmp = gen_reg_rtx (SImode); - emit_insn (gen_atomic_fetch_orsi (tmp, aligned_mem, word, model)); + emit_insn (gen_atomic_fetch_si (tmp, aligned_mem, word, model)); emit_move_insn (gen_lowpart (SImode, result), gen_rtx_LSHIFTRT (SImode, tmp, shmt)); DONE; }) +(define_expand "atomic_test_and_set" + [(match_operand:QI 0 "register_operand" "") ;; bool output + (match_operand:QI 1 "memory_operand" "+ZB") ;; memory + (match_operand:SI 2 "const_int_operand" "")] ;; model + "" +{ + rtx one = force_reg (QImode, gen_int_mode (1, QImode)); + emit_insn (gen_atomic_fetch_orqi (operands[0], operands[1], one, + operands[2])); + DONE; +}) + (define_insn "atomic_cas_value_cmp_and_7_" [(set (match_operand:GPR 0 "register_operand" "=&r") (match_operand:GPR 1 "memory_operand" "+ZC")) @@ -524,83 +546,6 @@ } [(set (attr "length") (const_int 28))]) -(define_insn "atomic_cas_value_and_7_" - [(set (match_operand:GPR 0 "register_operand" "=&r") ;; res - (match_operand:GPR 1 "memory_operand" "+ZC")) - (set (match_dup 1) - (unspec_volatile:GPR [(match_operand:GPR 2 "reg_or_0_operand" "rJ") ;; mask - (match_operand:GPR 3 "reg_or_0_operand" "rJ") ;; inverted_mask - (match_operand:GPR 4 "reg_or_0_operand" "rJ") ;; old val - (match_operand:GPR 5 "reg_or_0_operand" "rJ") ;; new val - (match_operand:SI 6 "const_int_operand")] ;; model - UNSPEC_COMPARE_AND_SWAP_AND)) - (clobber (match_scratch:GPR 7 "=&r")) - (clobber (match_scratch:GPR 8 "=&r"))] - "" -{ - return "1:\\n\\t" - "ll.\\t%0,%1\\n\\t" - "and\\t%7,%0,%3\\n\\t" - "and\\t%8,%0,%z5\\n\\t" - "and\\t%8,%8,%z2\\n\\t" - "or%i8\\t%7,%7,%8\\n\\t" - "sc.\\t%7,%1\\n\\t" - "beq\\t$zero,%7,1b"; -} - [(set (attr "length") (const_int 28))]) - -(define_insn "atomic_cas_value_xor_7_" - [(set (match_operand:GPR 0 "register_operand" "=&r") ;; res - (match_operand:GPR 1 "memory_operand" "+ZC")) - (set (match_dup 1) - (unspec_volatile:GPR [(match_operand:GPR 2 "reg_or_0_operand" "rJ") ;; mask - (match_operand:GPR 3 "reg_or_0_operand" "rJ") ;; inverted_mask - (match_operand:GPR 4 "reg_or_0_operand" "rJ") ;; old val - (match_operand:GPR 5 "reg_or_0_operand" "rJ") ;; new val - (match_operand:SI 6 "const_int_operand")] ;; model - UNSPEC_COMPARE_AND_SWAP_XOR)) - (clobber (match_scratch:GPR 7 "=&r")) - (clobber (match_scratch:GPR 8 "=&r"))] - "" -{ - return "1:\\n\\t" - "ll.\\t%0,%1\\n\\t" - "and\\t%7,%0,%3\\n\\t" - "xor\\t%8,%0,%z5\\n\\t" - "and\\t%8,%8,%z2\\n\\t" - "or%i8\\t%7,%7,%8\\n\\t" - "sc.\\t%7,%1\\n\\t" - "beq\\t$zero,%7,1b"; -} - - [(set (attr "length") (const_int 28))]) - -(define_insn "atomic_cas_value_or_7_" - [(set (match_operand:GPR 0 "register_operand" "=&r") ;; res - (match_operand:GPR 1 "memory_operand" "+ZC")) - (set (match_dup 1) - (unspec_volatile:GPR [(match_operand:GPR 2 "reg_or_0_operand" "rJ") ;; mask - (match_operand:GPR 3 "reg_or_0_operand" "rJ") ;; inverted_mask - (match_operand:GPR 4 "reg_or_0_operand" "rJ") ;; old val - (match_operand:GPR 5 "reg_or_0_operand" "rJ") ;; new val - (match_operand:SI 6 "const_int_operand")] ;; model - UNSPEC_COMPARE_AND_SWAP_OR)) - (clobber (match_scratch:GPR 7 "=&r")) - (clobber (match_scratch:GPR 8 "=&r"))] - "" -{ - return "1:\\n\\t" - "ll.\\t%0,%1\\n\\t" - "and\\t%7,%0,%3\\n\\t" - "or\\t%8,%0,%z5\\n\\t" - "and\\t%8,%8,%z2\\n\\t" - "or%i8\\t%7,%7,%8\\n\\t" - "sc.\\t%7,%1\\n\\t" - "beq\\t$zero,%7,1b"; -} - - [(set (attr "length") (const_int 28))]) - (define_insn "atomic_cas_value_nand_7_" [(set (match_operand:GPR 0 "register_operand" "=&r") ;; res (match_operand:GPR 1 "memory_operand" "+ZC")) @@ -725,60 +670,6 @@ DONE; }) -(define_expand "atomic_fetch_and" - [(set (match_operand:SHORT 0 "register_operand" "=&r") - (match_operand:SHORT 1 "memory_operand" "+ZB")) - (set (match_dup 1) - (unspec_volatile:SHORT - [(and:SHORT (match_dup 1) - (match_operand:SHORT 2 "reg_or_0_operand" "rJ")) - (match_operand:SI 3 "const_int_operand")] ;; model - UNSPEC_SYNC_OLD_OP))] - "" -{ - union loongarch_gen_fn_ptrs generator; - generator.fn_7 = gen_atomic_cas_value_and_7_si; - loongarch_expand_atomic_qihi (generator, operands[0], operands[1], - operands[1], operands[2], operands[3]); - DONE; -}) - -(define_expand "atomic_fetch_xor" - [(set (match_operand:SHORT 0 "register_operand" "=&r") - (match_operand:SHORT 1 "memory_operand" "+ZB")) - (set (match_dup 1) - (unspec_volatile:SHORT - [(xor:SHORT (match_dup 1) - (match_operand:SHORT 2 "reg_or_0_operand" "rJ")) - (match_operand:SI 3 "const_int_operand")] ;; model - UNSPEC_SYNC_OLD_OP))] - "" -{ - union loongarch_gen_fn_ptrs generator; - generator.fn_7 = gen_atomic_cas_value_xor_7_si; - loongarch_expand_atomic_qihi (generator, operands[0], operands[1], - operands[1], operands[2], operands[3]); - DONE; -}) - -(define_expand "atomic_fetch_or" - [(set (match_operand:SHORT 0 "register_operand" "=&r") - (match_operand:SHORT 1 "memory_operand" "+ZB")) - (set (match_dup 1) - (unspec_volatile:SHORT - [(ior:SHORT (match_dup 1) - (match_operand:SHORT 2 "reg_or_0_operand" "rJ")) - (match_operand:SI 3 "const_int_operand")] ;; model - UNSPEC_SYNC_OLD_OP))] - "" -{ - union loongarch_gen_fn_ptrs generator; - generator.fn_7 = gen_atomic_cas_value_or_7_si; - loongarch_expand_atomic_qihi (generator, operands[0], operands[1], - operands[1], operands[2], operands[3]); - DONE; -}) - (define_expand "atomic_fetch_nand" [(set (match_operand:SHORT 0 "register_operand" "=&r") (match_operand:SHORT 1 "memory_operand" "+ZB")) -- Gitee From 023494d35fd6565d3aa26bf41fb751f1a1012f27 Mon Sep 17 00:00:00 2001 From: Xi Ruoyao Date: Sat, 1 Mar 2025 11:46:49 +0800 Subject: [PATCH 17/31] LoongArch: Don't expand atomic_fetch_sub_{hi, qi} to LL-SC loop if -mlam-bh With -mlam-bh, we should negate the addend first, and use an amadd instruction. Disabling the expander makes the compiler do it correctly. gcc/ChangeLog: * config/loongarch/sync.md (atomic_fetch_sub): Disable if ISA_HAS_LAM_BH. --- gcc/config/loongarch/sync.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gcc/config/loongarch/sync.md b/gcc/config/loongarch/sync.md index 14bf480f011..ad43c58d016 100644 --- a/gcc/config/loongarch/sync.md +++ b/gcc/config/loongarch/sync.md @@ -661,7 +661,7 @@ (match_operand:SHORT 2 "reg_or_0_operand" "rJ")) (match_operand:SI 3 "const_int_operand")] ;; model UNSPEC_SYNC_OLD_OP))] - "" + "!ISA_HAS_LAM_BH" { union loongarch_gen_fn_ptrs generator; generator.fn_7 = gen_atomic_cas_value_sub_7_si; -- Gitee From 42e47e3263dbb1508ea5e80216d20c25f2738874 Mon Sep 17 00:00:00 2001 From: Xi Ruoyao Date: Fri, 12 Sep 2025 15:57:08 +0800 Subject: [PATCH 18/31] LoongArch: Fix wrong code from bstrpick split After late-combine is added, split1 can see an input like (insn 56 55 169 5 (set (reg/v:DI 87 [ n ]) (ior:DI (and:DI (reg/v:DI 87 [ n ]) (const_int 281474976710655 [0xffffffffffff])) (and:DI (reg:DI 131 [ _45 ]) (const_int -281474976710656 [0xffff000000000000])))) "pr121906.c":22:8 108 {*bstrins_di_for_ior_mask} (nil)) And the splitter ends up emitting (insn 184 55 185 5 (set (reg/v:DI 87 [ n ]) (reg:DI 131 [ _45 ])) "pr121906.c":22:8 -1 (nil)) (insn 185 184 169 5 (set (zero_extract:DI (reg/v:DI 87 [ n ]) (const_int 48 [0x30]) (const_int 0 [0])) (reg/v:DI 87 [ n ])) "pr121906.c":22:8 -1 (nil)) which obviously lost everything in r87, instead of retaining its lower bits as we expect. It's because the splitter didn't anticipate the output register may be one of the input registers. PR target/121906 gcc/ * config/loongarch/loongarch.md (*bstrins__for_ior_mask): Always create a new pseudo for the input register of the bstrins instruction. gcc/testsuite/ * gcc.target/loongarch/pr121906.c: New test. --- gcc/config/loongarch/loongarch.md | 14 ++++----- gcc/testsuite/gcc.target/loongarch/pr121906.c | 31 +++++++++++++++++++ 2 files changed, 38 insertions(+), 7 deletions(-) create mode 100644 gcc/testsuite/gcc.target/loongarch/pr121906.c diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md index f5181093a53..155390fb2d7 100644 --- a/gcc/config/loongarch/loongarch.md +++ b/gcc/config/loongarch/loongarch.md @@ -1630,13 +1630,13 @@ operands[2] = GEN_INT (len); operands[4] = GEN_INT (lo); - if (lo) - { - rtx tmp = gen_reg_rtx (mode); - emit_move_insn (tmp, gen_rtx_ASHIFTRT(mode, operands[3], - GEN_INT (lo))); - operands[3] = tmp; - } + /* Use a new pseudo register even if lo == 0 or we'll wreck havoc + when operands[0] is same as operands[3]. See PR 121906. */ + rtx tmp = gen_reg_rtx (mode); + rtx val = lo ? gen_rtx_ASHIFTRT (mode, operands[3], GEN_INT (lo)) + : operands[3]; + emit_move_insn (tmp, val); + operands[3] = tmp; }) ;; We always avoid the shift operation in bstrins__for_ior_mask diff --git a/gcc/testsuite/gcc.target/loongarch/pr121906.c b/gcc/testsuite/gcc.target/loongarch/pr121906.c new file mode 100644 index 00000000000..b4fde5f0c85 --- /dev/null +++ b/gcc/testsuite/gcc.target/loongarch/pr121906.c @@ -0,0 +1,31 @@ +/* PR target/121906 */ +/* { dg-do run } */ +/* { dg-options "-O2 -mno-lsx" } */ + +typedef unsigned short u16; +typedef unsigned long u64; +typedef u16 v4hi __attribute__ ((vector_size (8))); +typedef u16 v8hi __attribute__ ((vector_size (16))); + +u64 d; +int e, i; +u16 x; + +int +main () +{ + v4hi n = { 1 }; + u64 *o = &d; +p: + asm goto ("" : : : : q); + n[3] = (-(v8hi){ 0, 0, 0, 0, x })[7]; + for (; e >= 0; e--) + { + *o = n[0]; + if (i) + goto p; + q: + } + if (d != 1) + __builtin_trap (); +} -- Gitee From ca8eb688a5d676c744aa6e864cdf16df9c9e00a7 Mon Sep 17 00:00:00 2001 From: Xi Ruoyao Date: Sun, 26 Oct 2025 13:20:20 +0800 Subject: [PATCH 19/31] LoongArch: Make the code generation of the trap pattern configurable In some applications (notably the Linux kernel), "break 0" is used as a trap that a handler may be able to recover. But in GCC the "trap" pattern is meant to make the program rightfully die instead. As [1] describes, sometimes it's vital to distinguish between the two cases. The kernel developers prefer "break 1" here, but in the user-space it's better to trigger a SIGILL instead of SIGTRAP as the latter is more likely used as a application-defined trap. To support both cases, make the code generation configurable with a new option. [1]:https://lore.kernel.org/20250923061722.24457-1-yangtiezhu@loongson.cn gcc/ * config/loongarch/genopts/loongarch.opt.in (-mbreak-code=): New. * config/loongarch/loongarch.opt: Regenerate. * config/loongarch/loongarch.md (trap): Separate to a define_insn and a define_expand which takes la_break_code. * doc/invoke.texi (-mbreak-code=): Document. * config/loongarch/loongarch.opt.urls: Regenerate. gcc/testsuite * gcc.target/loongarch/trap-default.c: New test. * gcc.target/loongarch/trap-1.c: New test. --- gcc/config/loongarch/genopts/loongarch.opt.in | 4 ++++ gcc/config/loongarch/loongarch.md | 14 +++++++++++--- gcc/config/loongarch/loongarch.opt | 4 ++++ gcc/config/loongarch/loongarch.opt.urls | 3 +++ gcc/testsuite/gcc.target/loongarch/trap-1.c | 9 +++++++++ gcc/testsuite/gcc.target/loongarch/trap-default.c | 9 +++++++++ 6 files changed, 40 insertions(+), 3 deletions(-) create mode 100644 gcc/testsuite/gcc.target/loongarch/trap-1.c create mode 100644 gcc/testsuite/gcc.target/loongarch/trap-default.c diff --git a/gcc/config/loongarch/genopts/loongarch.opt.in b/gcc/config/loongarch/genopts/loongarch.opt.in index 4ffd969e350..9739279a549 100644 --- a/gcc/config/loongarch/genopts/loongarch.opt.in +++ b/gcc/config/loongarch/genopts/loongarch.opt.in @@ -205,6 +205,10 @@ mmax-inline-memcpy-size= Target Joined RejectNegative UInteger Var(la_max_inline_memcpy_size) Init(1024) Save -mmax-inline-memcpy-size=SIZE Set the max size of memcpy to inline, default is 1024. +mbreak-code= +Target Joined UInteger Var(la_break_code) Init(-1) Save +-mbreak-code=CODE Use 'break CODE' for traps supposed to be unrecoverable, or an 'amswap.w' instruction leading to INE if CODE is out of range. + Enum Name(explicit_relocs) Type(int) The code model option names for -mexplicit-relocs: diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md index 155390fb2d7..fd59ec9225e 100644 --- a/gcc/config/loongarch/loongarch.md +++ b/gcc/config/loongarch/loongarch.md @@ -689,14 +689,22 @@ ;; .................... ;; -(define_insn "trap" - [(trap_if (const_int 1) (const_int 0))] +(define_insn "*trap" + [(trap_if (const_int 1) (match_operand 0 "const_int_operand"))] "" { - return "break\t0"; + return (const_uimm15_operand (operands[0], VOIDmode) + ? "break\t%0" + : "amswap.w\t$r0,$r1,$r0"); } [(set_attr "type" "trap")]) +(define_expand "trap" + [(trap_if (const_int 1) (match_dup 0))] + "" +{ + operands[0] = GEN_INT (la_break_code); +}) ;; diff --git a/gcc/config/loongarch/loongarch.opt b/gcc/config/loongarch/loongarch.opt index 52469757fc6..55af42c9e77 100644 --- a/gcc/config/loongarch/loongarch.opt +++ b/gcc/config/loongarch/loongarch.opt @@ -213,6 +213,10 @@ mmax-inline-memcpy-size= Target Joined RejectNegative UInteger Var(la_max_inline_memcpy_size) Init(1024) Save -mmax-inline-memcpy-size=SIZE Set the max size of memcpy to inline, default is 1024. +mbreak-code= +Target Joined UInteger Var(la_break_code) Init(-1) Save +-mbreak-code=CODE Use 'break CODE' for traps supposed to be unrecoverable, or an 'amswap.w' instruction leading to INE if CODE is out of range. + Enum Name(explicit_relocs) Type(int) The code model option names for -mexplicit-relocs: diff --git a/gcc/config/loongarch/loongarch.opt.urls b/gcc/config/loongarch/loongarch.opt.urls index 5f644f6c315..7f02d1174ff 100644 --- a/gcc/config/loongarch/loongarch.opt.urls +++ b/gcc/config/loongarch/loongarch.opt.urls @@ -48,6 +48,9 @@ UrlSuffix(gcc/LoongArch-Options.html#index-mstrict-align-1) mmax-inline-memcpy-size= UrlSuffix(gcc/LoongArch-Options.html#index-mmax-inline-memcpy-size) +mbreak-code= +UrlSuffix(gcc/LoongArch-Options.html#index-mbreak-code) + mexplicit-relocs= UrlSuffix(gcc/LoongArch-Options.html#index-mexplicit-relocs-1) diff --git a/gcc/testsuite/gcc.target/loongarch/trap-1.c b/gcc/testsuite/gcc.target/loongarch/trap-1.c new file mode 100644 index 00000000000..8936f60cce2 --- /dev/null +++ b/gcc/testsuite/gcc.target/loongarch/trap-1.c @@ -0,0 +1,9 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -w -fisolate-erroneous-paths-dereference -mbreak-code=1" } */ +/* { dg-final { scan-assembler "break\\t1" } } */ + +int +bug (void) +{ + return *(int *)0; +} diff --git a/gcc/testsuite/gcc.target/loongarch/trap-default.c b/gcc/testsuite/gcc.target/loongarch/trap-default.c new file mode 100644 index 00000000000..32948d4c822 --- /dev/null +++ b/gcc/testsuite/gcc.target/loongarch/trap-default.c @@ -0,0 +1,9 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -w -fisolate-erroneous-paths-dereference" } */ +/* { dg-final { scan-assembler "amswap\\.w\\t\\\$r0,\\\$r1,\\\$r0" } } */ + +int +bug (void) +{ + return *(int *)0; +} -- Gitee From 7d443f4ce1e459b1ef72079dbda045353c527b0b Mon Sep 17 00:00:00 2001 From: Guo Jie Date: Sat, 1 Nov 2025 15:33:06 +0800 Subject: [PATCH 20/31] LoongArch: Correct the cost of mulh.{w[u]/d[u]} gcc/ChangeLog: * config/loongarch/loongarch.cc (loongarch_rtx_costs): Correct the cost of mulh.{w[u]|d[u]}. gcc/testsuite/ChangeLog: * gcc.target/loongarch/mulh_wu.c: New test. --- gcc/config/loongarch/loongarch.cc | 27 +++++++++++++++++++- gcc/testsuite/gcc.target/loongarch/mulh_wu.c | 10 ++++++++ 2 files changed, 36 insertions(+), 1 deletion(-) create mode 100644 gcc/testsuite/gcc.target/loongarch/mulh_wu.c diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc index 4690c2a2d96..aae2e4138ba 100644 --- a/gcc/config/loongarch/loongarch.cc +++ b/gcc/config/loongarch/loongarch.cc @@ -3901,9 +3901,34 @@ loongarch_rtx_costs (rtx x, machine_mode mode, int outer_code, speed); return true; + case LSHIFTRT: + /* Correct the cost of mulh.{w[u]/d[u]}. */ + if (outer_code == TRUNCATE && CONST_INT_P (XEXP (x, 1)) + && INTVAL (XEXP (x, 1)) == (GET_MODE_BITSIZE (mode) / 2) + && GET_CODE (XEXP (x, 0)) == MULT + && ((GET_CODE (XEXP (XEXP (x, 0), 0)) == ZERO_EXTEND + && GET_CODE (XEXP (XEXP (x, 0), 1)) == ZERO_EXTEND) + || (GET_CODE (XEXP (XEXP (x, 0), 0)) == SIGN_EXTEND + && GET_CODE (XEXP (XEXP (x, 0), 1)) == SIGN_EXTEND)) + && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == REG + && GET_CODE (XEXP (XEXP (XEXP (x, 0), 1), 0)) == REG) + { + if (GET_MODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SImode + && GET_MODE (XEXP (XEXP (XEXP (x, 0), 1), 0)) == SImode) + { + *total = loongarch_cost->int_mult_si; + return true; + } + if (GET_MODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == DImode + && GET_MODE (XEXP (XEXP (XEXP (x, 0), 1), 0)) == DImode) + { + *total = loongarch_cost->int_mult_di; + return true; + } + } + /* Fall through. */ case ASHIFT: case ASHIFTRT: - case LSHIFTRT: case ROTATE: case ROTATERT: if (CONSTANT_P (XEXP (x, 1))) diff --git a/gcc/testsuite/gcc.target/loongarch/mulh_wu.c b/gcc/testsuite/gcc.target/loongarch/mulh_wu.c new file mode 100644 index 00000000000..53fc518313c --- /dev/null +++ b/gcc/testsuite/gcc.target/loongarch/mulh_wu.c @@ -0,0 +1,10 @@ +/* { dg-do compile { target { loongarch64*-*-* } } } */ +/* { dg-options "-O3 -mabi=lp64d" } */ +/* { dg-final { scan-assembler "\tmulh.wu" } } */ +/* { dg-final { scan-assembler-not "\tlu32i.d" } } */ + +unsigned int +test (unsigned int *a) +{ + return *a / 60; +} -- Gitee From ab8ec5fd02d5dca8b236284b928d95cda257bde8 Mon Sep 17 00:00:00 2001 From: Xi Ruoyao Date: Tue, 4 Nov 2025 21:03:18 +0800 Subject: [PATCH 21/31] LoongArch: Switch the default code model to medium MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It has turned out the normal code model isn't enough for some large LoongArch link units in practice. Quoting WANG Rui's comment [1]: We’ve actually been considering pushing for a change to the default code model for LoongArch compilers (including GCC) for a while now. In fact, this was one of the topics discussed in yesterday’s internal compiler tool-chain meeting. The reason we haven’t moved forward with it yet is that the medium code model generates a R_LARCH_CALL36 relocation, which had some issues with earlier versions of the linker. We need to assess the impact on users before proceeding with the change. In GCC we have build-time probe for linker call36 support and if the linker does not support it, we fall back to pcalau12i + jirl or la.{local,global} + jirl for the medium code model. I also had some concern about a potential performance regression caused by the conservative nature of the relaxation process, but when I tested this patch it turned out the relaxation is powerful enough to eliminate all the pcaddu18i instructions in cc1plus and libstdc++.so. The Loong Arch Linux project has been using -mcmodel=medium in their {C,CXX}FLAGS building packages for a while [2] and they've not reported any issues with that. The Linux kernel developers has already anticipated the change and explicitly specified -mcmodel=normal for a while [3]. Thus to me it's safe to make GCC 16 the first release with the medium code model as the default now. If someone must keep the normal code model as the default for any reason, it's possible to configure GCC using --with-cmodel=normal. [1]: https://discourse.llvm.org/t/rfc-changing-the-default-code-model-for-loongarch/85317/3 [2]: https://github.com/lcpu-club/loongarch-packages/pull/340 [3]: https://git.kernel.org/torvalds/c/e67e0eb6a98b gcc/ * config.gcc: Support --with-cmodel={medium,normal} and make medium the default for LoongArch, define TARGET_DEFAULT_CMODEL as the selected value. * config/loongarch/loongarch-opts.cc: Use TARGET_DEFAULT_CMODEL instead of hard coding CMODEL_NORMAL. * doc/install.texi: Document that --with-cmodel= is supported for LoongArch. * doc/invoke.texi: Update the document about default code model on LoongArch. gcc/testsuite/ * gcc.target/loongarch/vect-frint-no-inexact.c (dg-options): Add -mcmodel=normal. * gcc.target/loongarch/vect-frint-scalar-no-inexact.c: Likewise. * gcc.target/loongarch/vect-frint-scalar.c: Likewise. * gcc.target/loongarch/vect-frint.c: Likewise. * gcc.target/loongarch/vect-ftint-no-inexact.c: Likewise. * gcc.target/loongarch/vect-ftint.c: Likewise. --- gcc/config.gcc | 18 +++++++++++++++++- gcc/config/loongarch/loongarch-opts.cc | 2 +- .../loongarch/vect-frint-no-inexact.c | 2 +- .../loongarch/vect-frint-scalar-no-inexact.c | 2 +- .../gcc.target/loongarch/vect-frint-scalar.c | 2 +- .../gcc.target/loongarch/vect-frint.c | 2 +- .../loongarch/vect-ftint-no-inexact.c | 2 +- .../gcc.target/loongarch/vect-ftint.c | 2 +- 8 files changed, 24 insertions(+), 8 deletions(-) diff --git a/gcc/config.gcc b/gcc/config.gcc index 0d5eafa93ea..31f84b37d16 100644 --- a/gcc/config.gcc +++ b/gcc/config.gcc @@ -4982,7 +4982,7 @@ case "${target}" in ;; loongarch*-*) - supported_defaults="abi arch tune fpu simd multilib-default strict-align-lib tls" + supported_defaults="abi arch tune fpu simd multilib-default strict-align-lib tls cmodel" # Local variables unset \ @@ -5403,6 +5403,22 @@ case "${target}" in # Remove the excessive appending comma. loongarch_multilib_list_c=${loongarch_multilib_list_c%,} loongarch_multilib_list_make=${loongarch_multilib_list_make%,} + + # Handle --with-cmodel. + # Make sure --with-cmodel is valid. If it was not specified, + # use medium as the default value. + case "${with_cmodel}" in + "" | medium) + tm_defines="${tm_defines} TARGET_DEFAULT_CMODEL=CMODEL_MEDIUM" + ;; + normal) + tm_defines="${tm_defines} TARGET_DEFAULT_CMODEL=CMODEL_NORMAL" + ;; + *) + echo "invalid option for --with-cmodel: '${with_cmodel}', available values are 'medium' and 'normal'" 1>&2 + exit 1 + ;; + esac ;; nds32*-*-*) diff --git a/gcc/config/loongarch/loongarch-opts.cc b/gcc/config/loongarch/loongarch-opts.cc index ef877ae167a..80a5eb87a43 100644 --- a/gcc/config/loongarch/loongarch-opts.cc +++ b/gcc/config/loongarch/loongarch-opts.cc @@ -540,7 +540,7 @@ fallback: /* 5. Target code model */ - t.cmodel = constrained.cmodel ? target->cmodel : CMODEL_NORMAL; + t.cmodel = constrained.cmodel ? target->cmodel : TARGET_DEFAULT_CMODEL; switch (t.cmodel) { diff --git a/gcc/testsuite/gcc.target/loongarch/vect-frint-no-inexact.c b/gcc/testsuite/gcc.target/loongarch/vect-frint-no-inexact.c index 7bbaf1fba5a..e20eaea205a 100644 --- a/gcc/testsuite/gcc.target/loongarch/vect-frint-no-inexact.c +++ b/gcc/testsuite/gcc.target/loongarch/vect-frint-no-inexact.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-O2 -mabi=lp64d -mdouble-float -fno-math-errno -fno-fp-int-builtin-inexact -mlasx" } */ +/* { dg-options "-O2 -mabi=lp64d -mdouble-float -fno-math-errno -fno-fp-int-builtin-inexact -mlasx -mcmodel=normal" } */ #include "vect-frint.c" diff --git a/gcc/testsuite/gcc.target/loongarch/vect-frint-scalar-no-inexact.c b/gcc/testsuite/gcc.target/loongarch/vect-frint-scalar-no-inexact.c index 002e3b92df7..d5f0933537d 100644 --- a/gcc/testsuite/gcc.target/loongarch/vect-frint-scalar-no-inexact.c +++ b/gcc/testsuite/gcc.target/loongarch/vect-frint-scalar-no-inexact.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-O2 -mlsx -fno-fp-int-builtin-inexact" } */ +/* { dg-options "-O2 -mlsx -fno-fp-int-builtin-inexact -mcmodel=normal" } */ #include "vect-frint-scalar.c" diff --git a/gcc/testsuite/gcc.target/loongarch/vect-frint-scalar.c b/gcc/testsuite/gcc.target/loongarch/vect-frint-scalar.c index dbcb9065ad4..171ba98f00b 100644 --- a/gcc/testsuite/gcc.target/loongarch/vect-frint-scalar.c +++ b/gcc/testsuite/gcc.target/loongarch/vect-frint-scalar.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-O2 -mlsx -ffp-int-builtin-inexact" } */ +/* { dg-options "-O2 -mlsx -ffp-int-builtin-inexact -mcmodel=normal" } */ #define test(func, suffix) \ __typeof__ (1.##suffix) \ diff --git a/gcc/testsuite/gcc.target/loongarch/vect-frint.c b/gcc/testsuite/gcc.target/loongarch/vect-frint.c index 6bf211e7e98..bda041bdf91 100644 --- a/gcc/testsuite/gcc.target/loongarch/vect-frint.c +++ b/gcc/testsuite/gcc.target/loongarch/vect-frint.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-O2 -mabi=lp64d -mdouble-float -fno-math-errno -ffp-int-builtin-inexact -mlasx" } */ +/* { dg-options "-O2 -mabi=lp64d -mdouble-float -fno-math-errno -ffp-int-builtin-inexact -mlasx -mcmodel=normal" } */ float out_x[8]; double out_y[4]; diff --git a/gcc/testsuite/gcc.target/loongarch/vect-ftint-no-inexact.c b/gcc/testsuite/gcc.target/loongarch/vect-ftint-no-inexact.c index 61918beef5c..3fa97531d59 100644 --- a/gcc/testsuite/gcc.target/loongarch/vect-ftint-no-inexact.c +++ b/gcc/testsuite/gcc.target/loongarch/vect-ftint-no-inexact.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-O2 -mabi=lp64d -mdouble-float -fno-math-errno -fno-fp-int-builtin-inexact -mlasx" } */ +/* { dg-options "-O2 -mabi=lp64d -mdouble-float -fno-math-errno -fno-fp-int-builtin-inexact -mlasx -mcmodel=normal" } */ #include "vect-ftint.c" diff --git a/gcc/testsuite/gcc.target/loongarch/vect-ftint.c b/gcc/testsuite/gcc.target/loongarch/vect-ftint.c index c4962ed1774..96da3cd7b57 100644 --- a/gcc/testsuite/gcc.target/loongarch/vect-ftint.c +++ b/gcc/testsuite/gcc.target/loongarch/vect-ftint.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-O2 -mabi=lp64d -mdouble-float -fno-math-errno -ffp-int-builtin-inexact -mlasx" } */ +/* { dg-options "-O2 -mabi=lp64d -mdouble-float -fno-math-errno -ffp-int-builtin-inexact -mlasx -mcmodel=normal" } */ int out_x[8]; long out_y[4]; -- Gitee From 226b976fe17306ff3b2c27a4c2a8e70e0d9ca7e1 Mon Sep 17 00:00:00 2001 From: Lulu Cheng Date: Mon, 3 Nov 2025 17:53:52 +0800 Subject: [PATCH 22/31] LoongArch: Fix PR122097 (2). r16-4703 does not completely fix PR122097. Floating-point vectors were not processed in the function loongarch_const_vector_same_bytes_p. This patch will completely resolve this issue. PR target/122097 gcc/ChangeLog: * config/loongarch/loongarch.cc (loongarch_const_vector_same_bytes_p): Add processing for floating-point vector data. --- gcc/config/loongarch/loongarch.cc | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc index aae2e4138ba..c02b5a18cc8 100644 --- a/gcc/config/loongarch/loongarch.cc +++ b/gcc/config/loongarch/loongarch.cc @@ -1783,7 +1783,27 @@ loongarch_const_vector_same_bytes_p (rtx op, machine_mode mode) first = CONST_VECTOR_ELT (op, 0); bytes = GET_MODE_UNIT_SIZE (mode); - val = INTVAL (first); + + if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT) + { + rtx val_s = CONST_VECTOR_ELT (op, 0); + const REAL_VALUE_TYPE *x = CONST_DOUBLE_REAL_VALUE (val_s); + if (GET_MODE (val_s) == DFmode) + { + long tmp[2]; + REAL_VALUE_TO_TARGET_DOUBLE (*x, tmp); + val = (unsigned HOST_WIDE_INT) tmp[1] << 32 | tmp[0]; + } + else + { + long tmp; + REAL_VALUE_TO_TARGET_SINGLE (*x, tmp); + val = (unsigned HOST_WIDE_INT) tmp; + } + } + else + val = UINTVAL (first); + first_byte = val & 0xff; for (i = 1; i < bytes; i++) { -- Gitee From e29f2ae6fddb6a8e5a95df8acace8dbdf903a97a Mon Sep 17 00:00:00 2001 From: Lulu Cheng Date: Tue, 14 Oct 2025 16:20:04 +0800 Subject: [PATCH 23/31] LoongArch: Fix ICE for illegal strings in the target attribute. Modify the two situations: 1. __attribute__ ((target ("arch"))) ICE will be reported before modification, and there will be an error prompt after modification. 2. __attribute__ ((target ("arch=12345"))) Fixed the issue where the attribute string was not printed completely in the previous error message. gcc/ChangeLog: * config/loongarch/loongarch-target-attr.cc (loongarch_process_one_target_attr): Fix ICE. gcc/testsuite/ChangeLog: * gcc.target/loongarch/attr-check-error-message.c: Add tests. --- gcc/config/loongarch/loongarch-target-attr.cc | 10 ++++++++-- .../gcc.target/loongarch/attr-check-error-message.c | 8 ++++++++ 2 files changed, 16 insertions(+), 2 deletions(-) diff --git a/gcc/config/loongarch/loongarch-target-attr.cc b/gcc/config/loongarch/loongarch-target-attr.cc index cb537446dff..922aa0483b5 100644 --- a/gcc/config/loongarch/loongarch-target-attr.cc +++ b/gcc/config/loongarch/loongarch-target-attr.cc @@ -203,7 +203,13 @@ loongarch_process_one_target_attr (char *arg_str, location_t loc) /* Use the option setting machinery to set an option to an enum. */ case loongarch_attr_enum: { - gcc_assert (arg); + if (!arg) + { + error_at (loc, "the value of pragma or attribute " + "% not be empty", str_to_check); + return false; + } + bool valid; int value; struct cl_decoded_option decoded; @@ -244,7 +250,7 @@ loongarch_process_one_target_attr (char *arg_str, location_t loc) were malformed we will have returned false already. */ if (!found) error_at (loc, "attribute % argument %qs is unknown", - str_to_check); + arg_str); return found; } diff --git a/gcc/testsuite/gcc.target/loongarch/attr-check-error-message.c b/gcc/testsuite/gcc.target/loongarch/attr-check-error-message.c index 82dcd172555..6420f332110 100644 --- a/gcc/testsuite/gcc.target/loongarch/attr-check-error-message.c +++ b/gcc/testsuite/gcc.target/loongarch/attr-check-error-message.c @@ -28,3 +28,11 @@ test6 (void) /* { dg-error "attribute \\\'target\\\' argument not a string" } */ __attribute__ ((target ("lsx,"))) void test7 (void) /* { dg-error "malformed \\\'target\\\(\\\"lsx,\\\"\\\)\\\' pragma or attribute" } */ {} + +__attribute__ ((target ("arch"))) void +test8 (void) /* { dg-error "the value of pragma or attribute \\\'target\\\(\\\"arch\\\"\\\)\\\' not be empty" } */ +{} + +__attribute__ ((target ("lsx;priority=1"))) void +test9 (void) /* { dg-error "attribute \\\'target\\\' argument \\\'lsx;priority=1\\\' is unknown" } */ +{} -- Gitee From 01a2da7c927d541d73c3f2014a7cf10ab4ca7c88 Mon Sep 17 00:00:00 2001 From: zhaozhou Date: Mon, 10 Nov 2025 15:38:26 +0800 Subject: [PATCH 24/31] LoongArch: Fix issue where data marked as GTY is cleaned up by ggc. As for GGC(GCC Garbage Collection), it's use gengtype tool to scan all source files contain the GTY mark, and generate gt-*.h files. GGC traversal these file to find gt_root node and marks these objects that directly or indirectly reference this node as active, then clean up unmarked object's memory. For the loongarch-builtins.cc file, it is necessary to add target_gtfiles in config.gcc to generate gt-loongarch-builtins.h, and include this header file in the .cc file, prevented the data marked with GTY in this `.cc` file cleaned up by ggc. gcc/ChangeLog: * config.gcc: Add target_gtfiles. * config/loongarch/loongarch-builtins.cc: Add header file. --- gcc/config.gcc | 1 + gcc/config/loongarch/loongarch-builtins.cc | 2 ++ 2 files changed, 3 insertions(+) diff --git a/gcc/config.gcc b/gcc/config.gcc index 31f84b37d16..ec22d4b3e78 100644 --- a/gcc/config.gcc +++ b/gcc/config.gcc @@ -460,6 +460,7 @@ loongarch*-*-*) extra_headers="larchintrin.h lsxintrin.h lasxintrin.h" extra_objs="loongarch-c.o loongarch-builtins.o loongarch-cpu.o loongarch-opts.o loongarch-def.o loongarch-evolution.o loongarch-target-attr.o" extra_gcc_objs="loongarch-driver.o loongarch-cpu.o loongarch-opts.o loongarch-def.o" + target_gtfiles="\$(srcdir)/config/loongarch/loongarch-builtins.cc" extra_options="${extra_options} g.opt fused-madd.opt" ;; nds32*) diff --git a/gcc/config/loongarch/loongarch-builtins.cc b/gcc/config/loongarch/loongarch-builtins.cc index 8492a5bda95..5d06a4d88c2 100644 --- a/gcc/config/loongarch/loongarch-builtins.cc +++ b/gcc/config/loongarch/loongarch-builtins.cc @@ -3106,3 +3106,5 @@ loongarch_build_builtin_va_list (void) { return ptr_type_node; } + +#include "gt-loongarch-builtins.h" -- Gitee From d03acd1accac2f6316b60288ecde2ccc0ca236d6 Mon Sep 17 00:00:00 2001 From: zhaozhou Date: Mon, 10 Nov 2025 15:20:26 +0800 Subject: [PATCH 25/31] LoongArch: Fix predicate for symbolic_pcrel_offset_operand. The predicate checks if the operand is PLUS(symbol_ref, const_int), but the match (match_operand 0/1) is not equal XEXP(op, 0/1). It should be adjusted to use match_test and pass XEXP(op, 0/1) into the constraint function. gcc/ChangeLog: * config/loongarch/predicates.md: Update ops. --- gcc/config/loongarch/predicates.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/gcc/config/loongarch/predicates.md b/gcc/config/loongarch/predicates.md index 8d9f92e2263..99bed32091b 100644 --- a/gcc/config/loongarch/predicates.md +++ b/gcc/config/loongarch/predicates.md @@ -570,8 +570,8 @@ (define_predicate "symbolic_pcrel_offset_operand" (and (match_code "plus") - (match_operand 0 "symbolic_pcrel_operand") - (match_operand 1 "const_int_operand"))) + (match_test "symbolic_pcrel_operand (XEXP (op, 0), mode)") + (match_test "const_int_operand (XEXP (op, 1), mode)"))) (define_predicate "mem_simple_ldst_operand" (match_code "mem") -- Gitee From d498634d1f5240d6362d8b0925b6ae411c9aad50 Mon Sep 17 00:00:00 2001 From: Xi Ruoyao Date: Sun, 16 Nov 2025 00:30:43 +0800 Subject: [PATCH 26/31] LoongArch: Fix wrong code from loongarch_expand_vec_perm_1 [PR 122695] PR target/122695 gcc/ * config/loongarch/loongarch.cc (loongarch_expand_vec_perm_1): Simplify and fix the logic preventing the xvshuf.* unpredictable behavior. gcc/testsuite/ * gcc.target/loongarch/pr122695-1.c: New test. * gcc.target/loongarch/pr122695-2.c: New test. --- gcc/config/loongarch/loongarch.cc | 56 ++----------------- .../gcc.target/loongarch/pr122695-1.c | 22 ++++++++ .../gcc.target/loongarch/pr122695-2.c | 22 ++++++++ 3 files changed, 50 insertions(+), 50 deletions(-) create mode 100644 gcc/testsuite/gcc.target/loongarch/pr122695-1.c create mode 100644 gcc/testsuite/gcc.target/loongarch/pr122695-2.c diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc index c02b5a18cc8..a06e29ffc7b 100644 --- a/gcc/config/loongarch/loongarch.cc +++ b/gcc/config/loongarch/loongarch.cc @@ -8934,57 +8934,13 @@ loongarch_expand_vec_perm_1 (rtx operands[]) /* Number of elements in the vector. */ w = GET_MODE_NUNITS (mode); - rtx round_data[MAX_VECT_LEN]; - rtx round_reg, round_data_rtx; - - if (mode != E_V32QImode) + /* If we are using xvshuf.*, clamp the selector to avoid unpredictable + output. */ + if (maskmode != V8SImode && maskmode != V4DImode) { - for (int i = 0; i < w; i += 1) - { - round_data[i] = GEN_INT (0x1f); - } - - if (mode == E_V4DFmode) - { - round_data_rtx = gen_rtx_CONST_VECTOR (E_V4DImode, - gen_rtvec_v (w, round_data)); - round_reg = gen_reg_rtx (E_V4DImode); - } - else if (mode == E_V8SFmode) - { - - round_data_rtx = gen_rtx_CONST_VECTOR (E_V8SImode, - gen_rtvec_v (w, round_data)); - round_reg = gen_reg_rtx (E_V8SImode); - } - else - { - round_data_rtx = gen_rtx_CONST_VECTOR (mode, - gen_rtvec_v (w, round_data)); - round_reg = gen_reg_rtx (mode); - } - - emit_move_insn (round_reg, round_data_rtx); - switch (mode) - { - case E_V32QImode: - emit_insn (gen_andv32qi3 (mask, mask, round_reg)); - break; - case E_V16HImode: - emit_insn (gen_andv16hi3 (mask, mask, round_reg)); - break; - case E_V8SImode: - case E_V8SFmode: - emit_insn (gen_andv8si3 (mask, mask, round_reg)); - break; - case E_V4DImode: - case E_V4DFmode: - emit_insn (gen_andv4di3 (mask, mask, round_reg)); - break; - default: - gcc_unreachable (); - break; - } + rtx t = gen_const_vec_duplicate (maskmode, GEN_INT (0x1f)); + mask = expand_binop (maskmode, and_optab, mask, t, NULL_RTX, false, + OPTAB_DIRECT); } if (mode == V4DImode || mode == V4DFmode) diff --git a/gcc/testsuite/gcc.target/loongarch/pr122695-1.c b/gcc/testsuite/gcc.target/loongarch/pr122695-1.c new file mode 100644 index 00000000000..d7c906cc0ae --- /dev/null +++ b/gcc/testsuite/gcc.target/loongarch/pr122695-1.c @@ -0,0 +1,22 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -mlasx" } */ +/* { dg-require-effective-target loongarch_asx_hw } */ + +[[gnu::vector_size (32)]] char a, b, c; + +[[gnu::noipa]] void +test (void) +{ + a = __builtin_shuffle (a, b, c); +} + +int +main (void) +{ + a = (typeof (a)){} + 5; + b = (typeof (a)){} + 6; + c = (typeof (a)){} + 64; + test (); + if (a[0] != 5) + __builtin_trap (); +} diff --git a/gcc/testsuite/gcc.target/loongarch/pr122695-2.c b/gcc/testsuite/gcc.target/loongarch/pr122695-2.c new file mode 100644 index 00000000000..79fc139b55f --- /dev/null +++ b/gcc/testsuite/gcc.target/loongarch/pr122695-2.c @@ -0,0 +1,22 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -mlasx" } */ +/* { dg-require-effective-target loongarch_asx_hw } */ + +[[gnu::vector_size(32)]] short a, b, c; + +[[gnu::noipa]] void +test (void) +{ + a = __builtin_shuffle(a, b, c) + c; +} + +int +main (void) +{ + a = (typeof (a)){} + 1; + b = (typeof (b)){} + 2; + c = (typeof (c)){} + 128; + test (); + if (a[0] != 129) + __builtin_trap (); +} -- Gitee From a27f2b823b25e12473396822e3e01f577c453687 Mon Sep 17 00:00:00 2001 From: zhaozhou Date: Fri, 14 Nov 2025 11:09:13 +0800 Subject: [PATCH 27/31] LoongArch: Fix operands[2] predicate of lsx_vreplvei_mirror. UNSPEC_LSX_VREPLVEI_MIRROR describes the mirroring operation that copies the lower 64 bits of a 128-bit register to the upper 64 bits. So in any mode, the value range of op2 can only be 0 or 1 for the vreplvei.d insn. gcc/ChangeLog: * config/loongarch/lsx.md: Fix predicate. --- gcc/config/loongarch/lsx.md | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/gcc/config/loongarch/lsx.md b/gcc/config/loongarch/lsx.md index b1bccccb5e5..0246f2c3aab 100644 --- a/gcc/config/loongarch/lsx.md +++ b/gcc/config/loongarch/lsx.md @@ -1920,11 +1920,15 @@ [(set_attr "type" "simd_splat") (set_attr "mode" "")]) +;; UNSPEC_LSX_VREPLVEI_MIRROR describes the mirror operation that copies +;; the lower 64 bits of a 128-bit register to the upper 64 bits. It is only +;; called when the high half-part is the same as the low. + (define_insn "lsx_vreplvei_mirror_" [(set (match_operand:LSX 0 "register_operand" "=f") (unspec: LSX [(match_operand:LSX 1 "register_operand" "f") - (match_operand 2 "const__operand" "")] - UNSPEC_LSX_VREPLVEI_MIRROR))] + (match_operand 2 "const_0_or_1_operand" "")] + UNSPEC_LSX_VREPLVEI_MIRROR))] "ISA_HAS_LSX" "vreplvei.d\t%w0,%w1,%2" [(set_attr "type" "simd_splat") -- Gitee From 46ba635a9ffeba378d67d739795bf95c8c710bdd Mon Sep 17 00:00:00 2001 From: DengJianbo Date: Tue, 13 Jan 2026 15:21:54 +0800 Subject: [PATCH 28/31] LoongArch: Fix ICE when explicit-relocs is none When set -mexplicit-relocs=none, the symbol address should be caclulated by macro instructions, for example la.local. Due to the condition TARGET_CMODEL_EXTREME in movdi_symbolic_off64, this template can not be matched in case the cmodel is normal. If the variable has attribute model("extreme"), gcc will get crashed with error unrecognizable insns. This patch fix this issue by removing TARGET_CMODEL_EXTREME, since it already checked in prediction symbolic_off64_or_reg_operand. gcc/ChangeLog: * config/loongarch/loongarch.md: Remove condition in template movdi_symbolic_off64. gcc/testsuite/ChangeLog: * gcc.target/loongarch/la64/attr-model-6.c: New test. --- gcc/config/loongarch/loongarch.md | 2 +- gcc/testsuite/gcc.target/loongarch/la64/attr-model-6.c | 8 ++++++++ 2 files changed, 9 insertions(+), 1 deletion(-) create mode 100644 gcc/testsuite/gcc.target/loongarch/la64/attr-model-6.c diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md index fd59ec9225e..86947547bbe 100644 --- a/gcc/config/loongarch/loongarch.md +++ b/gcc/config/loongarch/loongarch.md @@ -2256,7 +2256,7 @@ (unspec:DI [(const_int 0)] UNSPEC_LOAD_SYMBOL_OFFSET64) (clobber (match_operand:DI 2 "register_operand" "=&r,r"))] - "TARGET_64BIT && TARGET_CMODEL_EXTREME" + "TARGET_64BIT" { if (which_alternative == 1) return "#"; diff --git a/gcc/testsuite/gcc.target/loongarch/la64/attr-model-6.c b/gcc/testsuite/gcc.target/loongarch/la64/attr-model-6.c new file mode 100644 index 00000000000..9fa32a67bf4 --- /dev/null +++ b/gcc/testsuite/gcc.target/loongarch/la64/attr-model-6.c @@ -0,0 +1,8 @@ +/* { dg-do compile } */ +/* { dg-options "-mexplicit-relocs=none -mcmodel=normal -O2 -fno-pic" } */ +/* { dg-final { scan-assembler "la.local\t\\\$r\[0-9\]+,\\\$r\[0-9\]+,x" } } */ +/* { dg-final { scan-assembler "la.local\t\\\$r\[0-9\]+,y" } } */ +/* { dg-final { scan-assembler "la.local\t\\\$r\[0-9\]+,\\\$r\[0-9\]+,counter" } } */ + +#define ATTR_MODEL_TEST +#include "attr-model-test.c" -- Gitee From 957b49327725d31979ab42fb1685e040dafa849d Mon Sep 17 00:00:00 2001 From: Lulu Cheng Date: Mon, 12 Jan 2026 17:23:41 +0800 Subject: [PATCH 29/31] LoongArch: Fix bug123521. In the vector initialization process, optimization can be performed if it can be determined that all elements are the same, or if the upper or lower halves are identical. However, during this optimization, when the identical element is an immediate value larger than 10 bits, an internal compiler error (ICE) occurs. The reason is that in such cases, the function `simplify_gen_subreg (imode, reg_tmp, GET_MODE (reg_tmp), 0)` is called, where `imode` is `E_DImode`. The mode of `reg_tmp` in `GET_MODE (reg_tmp)` is taken from the immediate value's mode, which is `E_VOIDmode`. This results in a move from `E_VOIDmode` to `E_DImode`, an operation not supported by LoongArch. PR target/123521 gcc/ChangeLog: * config/loongarch/loongarch.cc (loongarch_expand_vector_init_same): Fixed a bug in the vector initialization section.. gcc/testsuite/ChangeLog: * gcc.target/loongarch/vector/lasx/pr123521.c: New test. Suggested-by: Xi Ruoyao --- gcc/config/loongarch/loongarch.cc | 27 +------------------ .../loongarch/vector/lasx/pr123521.c | 11 ++++++++ 2 files changed, 12 insertions(+), 26 deletions(-) create mode 100644 gcc/testsuite/gcc.target/loongarch/vector/lasx/pr123521.c diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc index a06e29ffc7b..16b9f5b870b 100644 --- a/gcc/config/loongarch/loongarch.cc +++ b/gcc/config/loongarch/loongarch.cc @@ -10150,32 +10150,7 @@ loongarch_expand_vector_init_same (rtx target, rtx vals, unsigned nvar) } } - if (imode == GET_MODE (same)) - temp = same; - else if (GET_MODE_SIZE (imode) >= UNITS_PER_WORD) - { - if (GET_CODE (same) == MEM) - { - rtx reg_tmp = gen_reg_rtx (GET_MODE (same)); - loongarch_emit_move (reg_tmp, same); - temp = simplify_gen_subreg (imode, reg_tmp, GET_MODE (reg_tmp), 0); - } - else - temp = simplify_gen_subreg (imode, same, GET_MODE (same), 0); - } - else - { - if (GET_CODE (same) == MEM) - { - rtx reg_tmp = gen_reg_rtx (GET_MODE (same)); - loongarch_emit_move (reg_tmp, same); - temp = lowpart_subreg (imode, reg_tmp, GET_MODE (reg_tmp)); - } - else - temp = lowpart_subreg (imode, same, GET_MODE (same)); - } - - temp = force_reg (imode, temp); + temp = force_reg (imode, same); switch (vmode) { diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/pr123521.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/pr123521.c new file mode 100644 index 00000000000..9ccf391d38d --- /dev/null +++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/pr123521.c @@ -0,0 +1,11 @@ +/* { dg-do compile } */ +/* { dg-options "-O1 -msimd=lasx" } */ + +typedef long long v4i64 __attribute__ ((vector_size (32), aligned (32))); +extern long long *x_si; +v4i64 +test (void) +{ + v4i64 a = { x_si[1], x_si[0], 0x411, 0x411 }; + return a; +} -- Gitee From 869d6e0ea703acb8f0b4e0a2b6e5df11539094ec Mon Sep 17 00:00:00 2001 From: Lulu Cheng Date: Sat, 17 Jan 2026 15:12:46 +0800 Subject: [PATCH 30/31] LoongArch: Fix bug117575. In the template "vec_set", a call is made to "lasx_xvinsve0__scalar", but there is an issue due to the different ranges of operand1 between the two templates. The range of operand1 in the template "lasx_xvinsve0__scalar" is now set to be the same as that in "vec_set". PR target/117575 gcc/ChangeLog: * config/loongarch/lasx.md: Modify the range of operand1. gcc/testsuite/ChangeLog: * g++.target/loongarch/pr117575.C: New test. --- gcc/config/loongarch/lasx.md | 2 +- gcc/testsuite/g++.target/loongarch/pr117575.C | 81 +++++++++++++++++++ 2 files changed, 82 insertions(+), 1 deletion(-) create mode 100644 gcc/testsuite/g++.target/loongarch/pr117575.C diff --git a/gcc/config/loongarch/lasx.md b/gcc/config/loongarch/lasx.md index 15142f8949e..c2aefea273e 100644 --- a/gcc/config/loongarch/lasx.md +++ b/gcc/config/loongarch/lasx.md @@ -4524,7 +4524,7 @@ [(set (match_operand:FLASX 0 "register_operand" "=f") (vec_merge:FLASX (vec_duplicate:FLASX - (match_operand: 1 "register_operand" "f")) + (match_operand: 1 "reg_or_0_operand" "f")) (match_operand:FLASX 2 "register_operand" "0") (match_operand 3 "const__operand" "")))] "ISA_HAS_LASX" diff --git a/gcc/testsuite/g++.target/loongarch/pr117575.C b/gcc/testsuite/g++.target/loongarch/pr117575.C new file mode 100644 index 00000000000..1870d4c890a --- /dev/null +++ b/gcc/testsuite/g++.target/loongarch/pr117575.C @@ -0,0 +1,81 @@ +/* { dg-do compile } */ +/* { dg-options "-O3 -mlasx" } */ + +typedef long unsigned int size_t; +typedef unsigned char simde__mmask8; +typedef double simde_float64; +typedef simde_float64 simde__m512d __attribute__ ((__aligned__ ((64)))) +__attribute__ ((__vector_size__ (64))) __attribute__ ((__may_alias__)); +typedef simde_float64 simde__m256d __attribute__ ((__aligned__ ((32)))) +__attribute__ ((__vector_size__ (32))) __attribute__ ((__may_alias__)); +simde__m512d simde_mm512_set_pd (simde_float64 e7, simde_float64 e6, + simde_float64 e5, simde_float64 e4, + simde_float64 e3, simde_float64 e2, + simde_float64 e1, simde_float64 e0); +simde__m256d simde_mm256_maskz_mov_pd (simde__mmask8 k, simde__m256d a); +int simde_test_x86_assert_equal_f64x4_ (simde__m256d a, simde__m256d b); + +typedef union +{ + + __attribute__ ((__aligned__ ((32)))) simde_float64 f64 + __attribute__ ((__vector_size__ (32))) __attribute__ ((__may_alias__)); +} simde__m256d_private; +__attribute__ ((__always_inline__)) inline static simde__m256d +simde__m256d_from_private (simde__m256d_private v) +{ + simde__m256d r; + __builtin_memcpy (&r, &v, sizeof (r)); + return r; +} +simde__m256d +simde_mm256_set_pd (simde_float64 e3, simde_float64 e2, simde_float64 e1, + simde_float64 e0) +{ + + simde__m256d_private r_; + + r_.f64[0] = e0; + r_.f64[1] = e1; + r_.f64[2] = e2; + r_.f64[3] = e3; + + return simde__m256d_from_private (r_); +} + +simde__m256d simde_mm512_extractf64x4_pd (simde__m512d a, int imm8); +int +test_simde_mm512_maskz_extractf64x4_pd (void) +{ + const struct + { + simde__mmask8 k; + simde__m512d a; + simde__m256d r0; + simde__m256d r1; + } test_vec[2] = { + { 21, + simde_mm512_set_pd (-139.11, -172.36, -268.86, 393.53, -71.72, 36.69, + 98.47, -135.52), + simde_mm256_set_pd (0.00, 36.69, 0.00, -135.52), + simde_mm256_set_pd (0.00, -172.36, 0.00, 393.53) }, + { 150, + simde_mm512_set_pd (-556.90, 522.06, 160.98, -932.28, 391.82, 600.12, + -569.99, -491.12), + simde_mm256_set_pd (0.00, 600.12, -569.99, 0.00), + simde_mm256_set_pd (0.00, 522.06, 160.98, 0.00) }, + }; + + for (size_t i = 0; i < (sizeof (test_vec) / sizeof (test_vec[0])); i++) + { + simde__m256d r; + r = simde_mm256_maskz_mov_pd ( + test_vec[i].k, simde_mm512_extractf64x4_pd (test_vec[i].a, 0)); + if (simde_test_x86_assert_equal_f64x4_ (r, test_vec[i].r0)) + { + return 1; + } + } + + return 0; +} -- Gitee From 15f0a0168f691764f0fa6e8108ca7d8b109b3bca Mon Sep 17 00:00:00 2001 From: Lulu Cheng Date: Tue, 27 Jan 2026 10:31:36 +0800 Subject: [PATCH 31/31] LoongArch: Fix bug123766. The pointer parameter type for the original store class builtin functions is CVPOINTER (const volatile void *). Taking the following test as an example: ``` v4i64 v = {0, 0, 0, 0}; void try_store() { long r[4]; __lasx_xvst(v, r, 0); } ``` At this point, the type of r is CVPOINTER, which means data in memory can only be read through r. Therefore, if the array r is not initialized, an uninitialized warning will be issued. This patch changes the pointer type of store-class builtin functions from CVPOINTER to VPOINTER (volatile void *). PR target/123766 gcc/ChangeLog: * config/loongarch/loongarch-builtins.cc (loongarch_build_vpointer_type): New function. Return a type for 'volatile void *'. (LARCH_ATYPE_VPOINTER): New macro. * config/loongarch/loongarch-ftypes.def: Change the pointer type of the store class function from CVPOINTER to VPOINTER. gcc/testsuite/ChangeLog: * gcc.target/loongarch/vector/lasx/pr123766.c: New test. * gcc.target/loongarch/vector/lsx/pr123766.c: New test. --- gcc/config/loongarch/loongarch-builtins.cc | 38 +++++++++++++------ gcc/config/loongarch/loongarch-ftypes.def | 24 ++++++------ .../loongarch/vector/lasx/pr123766.c | 28 ++++++++++++++ .../loongarch/vector/lsx/pr123766.c | 28 ++++++++++++++ 4 files changed, 94 insertions(+), 24 deletions(-) create mode 100644 gcc/testsuite/gcc.target/loongarch/vector/lasx/pr123766.c create mode 100644 gcc/testsuite/gcc.target/loongarch/vector/lsx/pr123766.c diff --git a/gcc/config/loongarch/loongarch-builtins.cc b/gcc/config/loongarch/loongarch-builtins.cc index 5d06a4d88c2..5edbea34544 100644 --- a/gcc/config/loongarch/loongarch-builtins.cc +++ b/gcc/config/loongarch/loongarch-builtins.cc @@ -1408,10 +1408,10 @@ static const struct loongarch_builtin_description loongarch_builtins[] = { LSX_BUILTIN (vfrintrp_d, LARCH_V2DF_FTYPE_V2DF), LSX_BUILTIN (vfrintrm_s, LARCH_V4SF_FTYPE_V4SF), LSX_BUILTIN (vfrintrm_d, LARCH_V2DF_FTYPE_V2DF), - LSX_NO_TARGET_BUILTIN (vstelm_b, LARCH_VOID_FTYPE_V16QI_CVPOINTER_SI_UQI), - LSX_NO_TARGET_BUILTIN (vstelm_h, LARCH_VOID_FTYPE_V8HI_CVPOINTER_SI_UQI), - LSX_NO_TARGET_BUILTIN (vstelm_w, LARCH_VOID_FTYPE_V4SI_CVPOINTER_SI_UQI), - LSX_NO_TARGET_BUILTIN (vstelm_d, LARCH_VOID_FTYPE_V2DI_CVPOINTER_SI_UQI), + LSX_NO_TARGET_BUILTIN (vstelm_b, LARCH_VOID_FTYPE_V16QI_VPOINTER_SI_UQI), + LSX_NO_TARGET_BUILTIN (vstelm_h, LARCH_VOID_FTYPE_V8HI_VPOINTER_SI_UQI), + LSX_NO_TARGET_BUILTIN (vstelm_w, LARCH_VOID_FTYPE_V4SI_VPOINTER_SI_UQI), + LSX_NO_TARGET_BUILTIN (vstelm_d, LARCH_VOID_FTYPE_V2DI_VPOINTER_SI_UQI), LSX_BUILTIN (vaddwev_d_w, LARCH_V2DI_FTYPE_V4SI_V4SI), LSX_BUILTIN (vaddwev_w_h, LARCH_V4SI_FTYPE_V8HI_V8HI), LSX_BUILTIN (vaddwev_h_b, LARCH_V8HI_FTYPE_V16QI_V16QI), @@ -1581,7 +1581,7 @@ static const struct loongarch_builtin_description loongarch_builtins[] = { LSX_BUILTIN (vssrarni_du_q, LARCH_UV2DI_FTYPE_UV2DI_V2DI_USI), LSX_BUILTIN (vpermi_w, LARCH_V4SI_FTYPE_V4SI_V4SI_USI), LSX_BUILTIN (vld, LARCH_V16QI_FTYPE_CVPOINTER_SI), - LSX_NO_TARGET_BUILTIN (vst, LARCH_VOID_FTYPE_V16QI_CVPOINTER_SI), + LSX_NO_TARGET_BUILTIN (vst, LARCH_VOID_FTYPE_V16QI_VPOINTER_SI), LSX_BUILTIN (vssrlrn_b_h, LARCH_V16QI_FTYPE_V8HI_V8HI), LSX_BUILTIN (vssrlrn_h_w, LARCH_V8HI_FTYPE_V4SI_V4SI), LSX_BUILTIN (vssrlrn_w_d, LARCH_V4SI_FTYPE_V2DI_V2DI), @@ -1592,7 +1592,7 @@ static const struct loongarch_builtin_description loongarch_builtins[] = { LSX_BUILTIN (vldi, LARCH_V2DI_FTYPE_HI), LSX_BUILTIN (vshuf_b, LARCH_V16QI_FTYPE_V16QI_V16QI_V16QI), LSX_BUILTIN (vldx, LARCH_V16QI_FTYPE_CVPOINTER_DI), - LSX_NO_TARGET_BUILTIN (vstx, LARCH_VOID_FTYPE_V16QI_CVPOINTER_DI), + LSX_NO_TARGET_BUILTIN (vstx, LARCH_VOID_FTYPE_V16QI_VPOINTER_DI), LSX_BUILTIN (vextl_qu_du, LARCH_UV2DI_FTYPE_UV2DI), /* Built-in functions for LASX */ @@ -2121,11 +2121,11 @@ static const struct loongarch_builtin_description loongarch_builtins[] = { LASX_BUILTIN (xvfrintrm_s, LARCH_V8SF_FTYPE_V8SF), LASX_BUILTIN (xvfrintrm_d, LARCH_V4DF_FTYPE_V4DF), LASX_BUILTIN (xvld, LARCH_V32QI_FTYPE_CVPOINTER_SI), - LASX_NO_TARGET_BUILTIN (xvst, LARCH_VOID_FTYPE_V32QI_CVPOINTER_SI), - LASX_NO_TARGET_BUILTIN (xvstelm_b, LARCH_VOID_FTYPE_V32QI_CVPOINTER_SI_UQI), - LASX_NO_TARGET_BUILTIN (xvstelm_h, LARCH_VOID_FTYPE_V16HI_CVPOINTER_SI_UQI), - LASX_NO_TARGET_BUILTIN (xvstelm_w, LARCH_VOID_FTYPE_V8SI_CVPOINTER_SI_UQI), - LASX_NO_TARGET_BUILTIN (xvstelm_d, LARCH_VOID_FTYPE_V4DI_CVPOINTER_SI_UQI), + LASX_NO_TARGET_BUILTIN (xvst, LARCH_VOID_FTYPE_V32QI_VPOINTER_SI), + LASX_NO_TARGET_BUILTIN (xvstelm_b, LARCH_VOID_FTYPE_V32QI_VPOINTER_SI_UQI), + LASX_NO_TARGET_BUILTIN (xvstelm_h, LARCH_VOID_FTYPE_V16HI_VPOINTER_SI_UQI), + LASX_NO_TARGET_BUILTIN (xvstelm_w, LARCH_VOID_FTYPE_V8SI_VPOINTER_SI_UQI), + LASX_NO_TARGET_BUILTIN (xvstelm_d, LARCH_VOID_FTYPE_V4DI_VPOINTER_SI_UQI), LASX_BUILTIN (xvinsve0_w, LARCH_V8SI_FTYPE_V8SI_V8SI_UQI), LASX_BUILTIN (xvinsve0_d, LARCH_V4DI_FTYPE_V4DI_V4DI_UQI), LASX_BUILTIN (xvpickve_w, LARCH_V8SI_FTYPE_V8SI_UQI), @@ -2141,7 +2141,7 @@ static const struct loongarch_builtin_description loongarch_builtins[] = { LASX_BUILTIN (xvorn_v, LARCH_UV32QI_FTYPE_UV32QI_UV32QI), LASX_BUILTIN (xvldi, LARCH_V4DI_FTYPE_HI), LASX_BUILTIN (xvldx, LARCH_V32QI_FTYPE_CVPOINTER_DI), - LASX_NO_TARGET_BUILTIN (xvstx, LARCH_VOID_FTYPE_V32QI_CVPOINTER_DI), + LASX_NO_TARGET_BUILTIN (xvstx, LARCH_VOID_FTYPE_V32QI_VPOINTER_DI), LASX_BUILTIN (xvextl_qu_du, LARCH_UV4DI_FTYPE_UV4DI), /* LASX */ @@ -2391,11 +2391,25 @@ loongarch_build_cvpointer_type (void) return cache; } +/* Return a type for 'volatile void *'. */ + +static tree +loongarch_build_vpointer_type (void) +{ + static tree cache; + + if (cache == NULL_TREE) + cache = build_pointer_type (build_qualified_type (void_type_node, + TYPE_QUAL_VOLATILE)); + return cache; +} + /* Source-level argument types. */ #define LARCH_ATYPE_VOID void_type_node #define LARCH_ATYPE_INT integer_type_node #define LARCH_ATYPE_POINTER ptr_type_node #define LARCH_ATYPE_CVPOINTER loongarch_build_cvpointer_type () +#define LARCH_ATYPE_VPOINTER loongarch_build_vpointer_type () #define LARCH_ATYPE_BOOLEAN boolean_type_node /* Standard mode-based argument types. */ #define LARCH_ATYPE_QI intQI_type_node diff --git a/gcc/config/loongarch/loongarch-ftypes.def b/gcc/config/loongarch/loongarch-ftypes.def index c7f849e8805..ee6051ee1c4 100644 --- a/gcc/config/loongarch/loongarch-ftypes.def +++ b/gcc/config/loongarch/loongarch-ftypes.def @@ -497,10 +497,10 @@ DEF_LARCH_FTYPE (2, (VOID, SI, SI)) DEF_LARCH_FTYPE (2, (VOID, UQI, SI)) DEF_LARCH_FTYPE (2, (VOID, USI, UQI)) DEF_LARCH_FTYPE (1, (VOID, UHI)) -DEF_LARCH_FTYPE (3, (VOID, V16QI, CVPOINTER, SI)) -DEF_LARCH_FTYPE (3, (VOID, V16QI, CVPOINTER, DI)) -DEF_LARCH_FTYPE (3, (VOID, V32QI, CVPOINTER, SI)) -DEF_LARCH_FTYPE (3, (VOID, V32QI, CVPOINTER, DI)) +DEF_LARCH_FTYPE (3, (VOID, V16QI, VPOINTER, SI)) +DEF_LARCH_FTYPE (3, (VOID, V16QI, VPOINTER, DI)) +DEF_LARCH_FTYPE (3, (VOID, V32QI, VPOINTER, SI)) +DEF_LARCH_FTYPE (3, (VOID, V32QI, VPOINTER, DI)) DEF_LARCH_FTYPE (3, (VOID, V4DF, POINTER, SI)) DEF_LARCH_FTYPE (3, (VOID, V2DF, POINTER, SI)) DEF_LARCH_FTYPE (3, (VOID, V2DI, CVPOINTER, SI)) @@ -682,10 +682,10 @@ DEF_LARCH_FTYPE(3,(V4DI,V4DI,UV16HI,V16HI)) DEF_LARCH_FTYPE(2,(UV8SI,UV32QI,UV32QI)) DEF_LARCH_FTYPE(2,(V8SI,UV32QI,V32QI)) -DEF_LARCH_FTYPE(4,(VOID,V16QI,CVPOINTER,SI,UQI)) -DEF_LARCH_FTYPE(4,(VOID,V8HI,CVPOINTER,SI,UQI)) -DEF_LARCH_FTYPE(4,(VOID,V4SI,CVPOINTER,SI,UQI)) -DEF_LARCH_FTYPE(4,(VOID,V2DI,CVPOINTER,SI,UQI)) +DEF_LARCH_FTYPE(4,(VOID,V16QI,VPOINTER,SI,UQI)) +DEF_LARCH_FTYPE(4,(VOID,V8HI,VPOINTER,SI,UQI)) +DEF_LARCH_FTYPE(4,(VOID,V4SI,VPOINTER,SI,UQI)) +DEF_LARCH_FTYPE(4,(VOID,V2DI,VPOINTER,SI,UQI)) DEF_LARCH_FTYPE (2, (DI, V16QI, UQI)) DEF_LARCH_FTYPE (2, (DI, V8HI, UQI)) @@ -707,10 +707,10 @@ DEF_LARCH_FTYPE (3, (UV16HI, UV16HI, V16HI, USI)) DEF_LARCH_FTYPE (3, (UV8SI, UV8SI, V8SI, USI)) DEF_LARCH_FTYPE (3, (UV4DI, UV4DI, V4DI, USI)) -DEF_LARCH_FTYPE(4,(VOID,V32QI,CVPOINTER,SI,UQI)) -DEF_LARCH_FTYPE(4,(VOID,V16HI,CVPOINTER,SI,UQI)) -DEF_LARCH_FTYPE(4,(VOID,V8SI,CVPOINTER,SI,UQI)) -DEF_LARCH_FTYPE(4,(VOID,V4DI,CVPOINTER,SI,UQI)) +DEF_LARCH_FTYPE(4,(VOID,V32QI,VPOINTER,SI,UQI)) +DEF_LARCH_FTYPE(4,(VOID,V16HI,VPOINTER,SI,UQI)) +DEF_LARCH_FTYPE(4,(VOID,V8SI,VPOINTER,SI,UQI)) +DEF_LARCH_FTYPE(4,(VOID,V4DI,VPOINTER,SI,UQI)) DEF_LARCH_FTYPE (1, (BOOLEAN,V16QI)) DEF_LARCH_FTYPE(2,(V16QI,CVPOINTER,CVPOINTER)) diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/pr123766.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/pr123766.c new file mode 100644 index 00000000000..88170cd6aef --- /dev/null +++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/pr123766.c @@ -0,0 +1,28 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -mlasx -Werror -Wextra" } */ + +#include + +__m256i v = {0, 0, 0, 0}; + +#define TEST(NAME) \ + void test_##NAME (void) \ + { \ + long long r[4]; \ + __lasx_##NAME (v, r, 0); \ + } + +TEST (xvst); +TEST (xvstx); + +#define TEST1(NAME, TYPE, NUM) \ + void test_##NAME (void) \ + { \ + TYPE r[NUM]; \ + __lasx_##NAME (v, r, 0, 0); \ + } + +TEST1 (xvstelm_b, char, 32); +TEST1 (xvstelm_h, short, 16); +TEST1 (xvstelm_w, int, 8); +TEST1 (xvstelm_d, long long, 4); diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/pr123766.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/pr123766.c new file mode 100644 index 00000000000..54f57e61113 --- /dev/null +++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/pr123766.c @@ -0,0 +1,28 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -mlsx -Werror -Wextra" } */ + +#include + +__m128i v = {0, 0}; + +#define TEST(NAME) \ + void test_##NAME (void) \ + { \ + long long r[2]; \ + __lsx_##NAME (v, r, 0); \ + } + +TEST (vst); +TEST (vstx); + +#define TEST1(NAME, TYPE, NUM) \ + void test_##NAME (void) \ + { \ + TYPE r[NUM]; \ + __lsx_##NAME (v, r, 0, 0); \ + } + +TEST1 (vstelm_b, char, 16); +TEST1 (vstelm_h, short, 8); +TEST1 (vstelm_w, int, 4); +TEST1 (vstelm_d, long long, 2); -- Gitee