From 2bbc0301d5c7127cbd45b88c63ed946a47a2aacd Mon Sep 17 00:00:00 2001
From: Peng Fan <fanpeng@loongson.cn>
Date: Mon, 3 Nov 2025 01:12:57 +0000
Subject: [PATCH 01/31] LoongArch: Implement NOCE_CONVERSION_PROFITABLE_P to be
 compatible

Signed-off-by: Peng Fan <fanpeng@loongson.cn>
---
 gcc/config/loongarch/loongarch.cc | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc
index c90fc726f05..85ea8b6f348 100644
--- a/gcc/config/loongarch/loongarch.cc
+++ b/gcc/config/loongarch/loongarch.cc
@@ -11177,6 +11177,20 @@ loongarch_c_mode_for_suffix (char suffix)
   return VOIDmode;
 }
 
+static bool
+loongarch_noce_conversion_profitable_p (rtx_insn *seq, struct noce_if_info *if_info)
+{
+    enum rtx_class cls;
+      for (rtx_insn *insn = seq; insn; insn = NEXT_INSN (insn))
+    {
+       cls = GET_RTX_CLASS (GET_CODE (SET_SRC (single_set (insn))));
+      if ( cls == RTX_COMPARE || cls == RTX_COMM_COMPARE)
+    return false;
+    }
+
+  return default_noce_conversion_profitable_p (seq, if_info);
+}
+
 /* Initialize the GCC target structure.  */
 #undef TARGET_ASM_ALIGNED_HI_OP
 #define TARGET_ASM_ALIGNED_HI_OP "\t.half\t"
@@ -11448,6 +11462,9 @@ loongarch_c_mode_for_suffix (char suffix)
 #undef TARGET_C_MODE_FOR_SUFFIX
 #define TARGET_C_MODE_FOR_SUFFIX loongarch_c_mode_for_suffix
 
+#undef TARGET_NOCE_CONVERSION_PROFITABLE_P
+#define TARGET_NOCE_CONVERSION_PROFITABLE_P loongarch_noce_conversion_profitable_p
+
 struct gcc_target targetm = TARGET_INITIALIZER;
 
 #include "gt-loongarch.h"
-- 
Gitee


From 4aff955340f5a7502e752b8adc67284434dc61d7 Mon Sep 17 00:00:00 2001
From: Jinyang He <hejinyang@loongson.cn>
Date: Wed, 29 Oct 2025 16:07:35 +0800
Subject: [PATCH 02/31] LoongArch: Only allow valid binary op when optimize
 conditional move

It is wrong that optimize from `if (cond) dest op= 1 << shift` to
`dest op= (cond ? 1 : 0) << shift` when `dest op 0 != dest`.
Like `and`, `mul` or `div`.
And in this optimization `mul` and `div` is optimized to shift.

gcc/ChangeLog:

	* config/loongarch/loongarch.cc
	(loongarch_expand_conditional_move): Only allow valid binary
	op when optimize conditional move.

gcc/testsuite/ChangeLog:

	* gcc.target/loongarch/conditional-move-opt-1.c: Remove mul.
	* gcc.target/loongarch/conditional-move-opt-2.c: Remove and.
	* gcc.target/loongarch/conditional-move-opt-3.c: New test.

Co-Authored-By: Peng Fan <fanpeng@loongson.cn>
---
 gcc/config/loongarch/loongarch.cc             | 26 ++++++++++++++++---
 .../loongarch/conditional-move-opt-1.c        |  4 +--
 .../loongarch/conditional-move-opt-2.c        |  2 +-
 .../loongarch/conditional-move-opt-3.c        | 14 ++++++++++
 4 files changed, 40 insertions(+), 6 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/loongarch/conditional-move-opt-3.c

diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc
index 85ea8b6f348..b44d52adff1 100644
--- a/gcc/config/loongarch/loongarch.cc
+++ b/gcc/config/loongarch/loongarch.cc
@@ -5426,12 +5426,32 @@ loongarch_expand_conditional_move (rtx *operands)
 	    }
 	}
 
+      auto is_binary_op_0_keep_orig = [](enum rtx_code code)
+	{
+	  switch (code)
+	    {
+	    case PLUS:
+	    case MINUS:
+	    case IOR:
+	    case XOR:
+	    case ROTATE:
+	    case ROTATERT:
+	    case ASHIFT:
+	    case ASHIFTRT:
+	    case LSHIFTRT:
+	      return true;
+	    default:
+	      return false;
+	    }
+	};
+
       /* Check if the optimization conditions are met.  */
       if (value_if_true_insn
 	  && value_if_false_insn
-	  /* Make sure that value_if_false and var are the same.  */
-	  && BINARY_P (value_if_true_insn_src
-		       = SET_SRC (single_set (value_if_true_insn)))
+	  /* Make sure that the orig value OP 0 keep orig.  */
+	  && (value_if_true_insn_src
+	      = SET_SRC (single_set (value_if_true_insn)))
+	  && is_binary_op_0_keep_orig ( GET_CODE (value_if_true_insn_src))
 	  /* Make sure that both value_if_true and value_if_false
 	     has the same var.  */
 	  && rtx_equal_p (XEXP (value_if_true_insn_src, 0),
diff --git a/gcc/testsuite/gcc.target/loongarch/conditional-move-opt-1.c b/gcc/testsuite/gcc.target/loongarch/conditional-move-opt-1.c
index ed13471aa90..47802aa9688 100644
--- a/gcc/testsuite/gcc.target/loongarch/conditional-move-opt-1.c
+++ b/gcc/testsuite/gcc.target/loongarch/conditional-move-opt-1.c
@@ -27,7 +27,7 @@ void
 test_lt ()
 {
   if (lm < ln)
-    lr *= (1 << 16);
+    lr += (1 << 16);
   lr += lm;
 }
 
@@ -35,7 +35,7 @@ void
 test_le ()
 {
   if (lm <= ln)
-    lr = lm * ((long)1 << 32);
+    lr = lm + ((long)1 << 32);
   else
     lr = lm;
   lr += lm;
diff --git a/gcc/testsuite/gcc.target/loongarch/conditional-move-opt-2.c b/gcc/testsuite/gcc.target/loongarch/conditional-move-opt-2.c
index ac72d4d933a..743fd5e670e 100644
--- a/gcc/testsuite/gcc.target/loongarch/conditional-move-opt-2.c
+++ b/gcc/testsuite/gcc.target/loongarch/conditional-move-opt-2.c
@@ -29,7 +29,7 @@ void
 test_lez ()
 {
   if (lm <= 0)
-    lr &= (1 << 16);
+    lr |= (1 << 16);
   lr += lm;
 }
 
diff --git a/gcc/testsuite/gcc.target/loongarch/conditional-move-opt-3.c b/gcc/testsuite/gcc.target/loongarch/conditional-move-opt-3.c
new file mode 100644
index 00000000000..95887980cc5
--- /dev/null
+++ b/gcc/testsuite/gcc.target/loongarch/conditional-move-opt-3.c
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+/* { dg-final { scan-assembler "maskeqz" } } */
+/* { dg-final { scan-assembler "masknez" } } */
+
+extern long lm, ln, lr;
+
+void
+test_and ()
+{
+  if (lm < 0)
+    lr &= (1 << 16);
+  lr += lm;
+}
-- 
Gitee


From 3261043e01f17c4175b39bd3937d01a68bc326f9 Mon Sep 17 00:00:00 2001
From: Xi Ruoyao <xry111@xry111.site>
Date: Tue, 8 Jul 2025 14:39:11 +0800
Subject: [PATCH 03/31] lra: Reallow reloading user hard registers if the insn
 is not asm [PR 120983]

The PR 87600 fix has disallowed reloading user hard registers to resolve
earlyclobber-induced conflict.

However before reload, recog completely ignores the constraints of
insns, so the RTL passes may produce insns where some user hard
registers violate an earlyclobber.  Then we'll get an ICE without
reloading them, like what we are recently encountering in LoongArch test
suite.

IIUC "recog does not look at constraints until reload" has been a
well-established rule in GCC for years and I don't have enough skill to
challange it.  So reallow reloading user hard registers (but still
disallow doing so for asm) to fix the ICE.

gcc/ChangeLog:

	PR rtl-optimization/120983
	* lra-constraints.cc (process_alt_operands): Allow reloading
	user hard registers unless the insn is an asm.
---
 gcc/lra-constraints.cc | 19 ++++++++-----------
 1 file changed, 8 insertions(+), 11 deletions(-)

diff --git a/gcc/lra-constraints.cc b/gcc/lra-constraints.cc
index d92ab76908c..6a003d763a6 100644
--- a/gcc/lra-constraints.cc
+++ b/gcc/lra-constraints.cc
@@ -2304,14 +2304,15 @@ process_alt_operands (int only_alternative)
 			if (curr_static_id->operand[nop].type == OP_INOUT
 			    || curr_static_id->operand[m].type == OP_INOUT)
 			  break;
-			/* Operands don't match.  If the operands are
-			   different user defined explicit hard
+			/* Operands don't match.  For asm if the operands
+			   are different user defined explicit hard
 			   registers, then we cannot make them match
 			   when one is early clobber operand.  */
 			if ((REG_P (*curr_id->operand_loc[nop])
 			     || SUBREG_P (*curr_id->operand_loc[nop]))
 			    && (REG_P (*curr_id->operand_loc[m])
-				|| SUBREG_P (*curr_id->operand_loc[m])))
+				|| SUBREG_P (*curr_id->operand_loc[m]))
+			    && INSN_CODE (curr_insn) < 0)
 			  {
 			    rtx nop_reg = *curr_id->operand_loc[nop];
 			    if (SUBREG_P (nop_reg))
@@ -3129,19 +3130,15 @@ process_alt_operands (int only_alternative)
 		  first_conflict_j = j;
 		last_conflict_j = j;
 		/* Both the earlyclobber operand and conflicting operand
-		   cannot both be user defined hard registers.  */
+		   cannot both be user defined hard registers for asm.
+		   Let curr_insn_transform diagnose it.  */
 		if (HARD_REGISTER_P (operand_reg[i])
 		    && REG_USERVAR_P (operand_reg[i])
 		    && operand_reg[j] != NULL_RTX
 		    && HARD_REGISTER_P (operand_reg[j])
-		    && REG_USERVAR_P (operand_reg[j]))
-		  {
-		    /* For asm, let curr_insn_transform diagnose it.  */
-		    if (INSN_CODE (curr_insn) < 0)
+		    && REG_USERVAR_P (operand_reg[j])
+		    && INSN_CODE (curr_insn) < 0)
 		      return false;
-		    fatal_insn ("unable to generate reloads for "
-				"impossible constraints:", curr_insn);
-		  }
 	      }
 	  if (last_conflict_j < 0)
 	    continue;
-- 
Gitee


From a7e2ee8841d8213b09be9b3413cfda33158a18b5 Mon Sep 17 00:00:00 2001
From: Xi Ruoyao <xry111@xry111.site>
Date: Tue, 15 Jul 2025 03:01:12 +0800
Subject: [PATCH 04/31] LoongArch: Fix wrong code generated by
 TARGET_VECTORIZE_VEC_PERM_CONST [PR121064]

When TARGET_VECTORIZE_VEC_PERM_CONST is called, target may be the
same pseudo as op0 and/or op1.  Loading the selector into target
would clobber the input, producing wrong code like

    vld     $vr0, $t0
    vshuf.w $vr0, $vr0, $vr1

So don't load the selector into d->target, use a new pseudo to hold the
selector instead.  The reload pass will load the pseudo for selector and
the pseudo for target into the same hard register (following our
constraint '0' on the shuf instructions) anyway.

gcc/ChangeLog:

	PR target/121064
	* config/loongarch/lsx.md (lsx_vshuf_<lsxfmt_f>): Add '@' to
	generate a mode-aware helper.  Use <VIMODE> as the mode of the
	operand 1 (selector).
	* config/loongarch/lasx.md (lasx_xvshuf_<lasxfmt_f>): Likewise.
	* config/loongarch/loongarch.cc
	(loongarch_try_expand_lsx_vshuf_const): Create a new pseudo for
	the selector.  Use the mode-aware helper to simplify the code.
	(loongarch_expand_vec_perm_const): Likewise.

gcc/testsuite/ChangeLog:

	PR target/121064
	* gcc.target/loongarch/pr121064.c: New test.
---
 gcc/config/loongarch/lasx.md                  |   4 +-
 gcc/config/loongarch/loongarch.cc             | 126 +++++-------------
 gcc/config/loongarch/lsx.md                   |   4 +-
 gcc/testsuite/gcc.target/loongarch/pr121064.c |  38 ++++++
 4 files changed, 73 insertions(+), 99 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/loongarch/pr121064.c

diff --git a/gcc/config/loongarch/lasx.md b/gcc/config/loongarch/lasx.md
index 73ca7caaaa0..15142f8949e 100644
--- a/gcc/config/loongarch/lasx.md
+++ b/gcc/config/loongarch/lasx.md
@@ -2156,9 +2156,9 @@
   [(set_attr "type" "simd_int_arith")
    (set_attr "mode" "<MODE>")])
 
-(define_insn "lasx_xvshuf_<lasxfmt_f>"
+(define_insn "@lasx_xvshuf_<lasxfmt_f>"
   [(set (match_operand:LASX_DWH 0 "register_operand" "=f")
-	(unspec:LASX_DWH [(match_operand:LASX_DWH 1 "register_operand" "0")
+	(unspec:LASX_DWH [(match_operand:<VIMODE> 1 "register_operand" "0")
 			  (match_operand:LASX_DWH 2 "register_operand" "f")
 			  (match_operand:LASX_DWH 3 "register_operand" "f")]
 			UNSPEC_LASX_XVSHUF))]
diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc
index b44d52adff1..883db986aae 100644
--- a/gcc/config/loongarch/loongarch.cc
+++ b/gcc/config/loongarch/loongarch.cc
@@ -8390,7 +8390,7 @@ static bool
 loongarch_try_expand_lsx_vshuf_const (struct expand_vec_perm_d *d)
 {
   int i;
-  rtx target, op0, op1, sel, tmp;
+  rtx target, op0, op1;
   rtx rperm[MAX_VECT_LEN];
 
   if (GET_MODE_SIZE (d->vmode) == 16)
@@ -8409,47 +8409,23 @@ loongarch_try_expand_lsx_vshuf_const (struct expand_vec_perm_d *d)
       for (i = 0; i < d->nelt; i += 1)
 	  rperm[i] = GEN_INT (d->perm[i]);
 
-      if (d->vmode == E_V2DFmode)
-	{
-	  sel = gen_rtx_CONST_VECTOR (E_V2DImode, gen_rtvec_v (d->nelt, rperm));
-	  tmp = simplify_gen_subreg (E_V2DImode, d->target, d->vmode, 0);
-	  emit_move_insn (tmp, sel);
-	}
-      else if (d->vmode == E_V4SFmode)
-	{
-	  sel = gen_rtx_CONST_VECTOR (E_V4SImode, gen_rtvec_v (d->nelt, rperm));
-	  tmp = simplify_gen_subreg (E_V4SImode, d->target, d->vmode, 0);
-	  emit_move_insn (tmp, sel);
-	}
+      machine_mode sel_mode = related_int_vector_mode (d->vmode)
+	.require ();
+      rtvec sel_v = gen_rtvec_v (d->nelt, rperm);
+
+      /* Despite vshuf.* (except vshuf.b) needs sel == target, we cannot
+	 load sel into target right now: here we are dealing with
+	 pseudo regs, and target may be the same pseudo as one of op0
+	 or op1.  Then we'd clobber the input.  Instead, we use a new
+	 pseudo reg here.  The reload pass will look at the constraint
+	 of vshuf.* and move sel into target first if needed.  */
+      rtx sel = force_reg (sel_mode,
+			   gen_rtx_CONST_VECTOR (sel_mode, sel_v));
+
+      if (d->vmode == E_V16QImode)
+	emit_insn (gen_lsx_vshuf_b (target, op1, op0, sel));
       else
-	{
-	  sel = gen_rtx_CONST_VECTOR (d->vmode, gen_rtvec_v (d->nelt, rperm));
-	  emit_move_insn (d->target, sel);
-	}
-
-      switch (d->vmode)
-	{
-	case E_V2DFmode:
-	  emit_insn (gen_lsx_vshuf_d_f (target, target, op1, op0));
-	  break;
-	case E_V2DImode:
-	  emit_insn (gen_lsx_vshuf_d (target, target, op1, op0));
-	  break;
-	case E_V4SFmode:
-	  emit_insn (gen_lsx_vshuf_w_f (target, target, op1, op0));
-	  break;
-	case E_V4SImode:
-	  emit_insn (gen_lsx_vshuf_w (target, target, op1, op0));
-	  break;
-	case E_V8HImode:
-	  emit_insn (gen_lsx_vshuf_h (target, target, op1, op0));
-	  break;
-	case E_V16QImode:
-	  emit_insn (gen_lsx_vshuf_b (target, op1, op0, target));
-	  break;
-	default:
-	  break;
-	}
+	emit_insn (gen_lsx_vshuf (d->vmode, target, sel, op1, op0));
 
       return true;
     }
@@ -9445,7 +9421,7 @@ loongarch_expand_vec_perm_const (struct expand_vec_perm_d *d)
   bool flag = false;
   unsigned int i;
   unsigned char idx;
-  rtx target, op0, op1, sel, tmp;
+  rtx target, op0, op1;
   rtx rperm[MAX_VECT_LEN];
   unsigned int remapped[MAX_VECT_LEN];
   unsigned char perm2[MAX_VECT_LEN];
@@ -9625,63 +9601,23 @@ loongarch_expand_vec_perm_const (struct expand_vec_perm_d *d)
 expand_perm_const_end:
       if (flag)
 	{
-	  /* Copy selector vector from memory to vector register for later insn
-	     gen function.
-	     If vector's element in floating point value, we cannot fit
-	     selector argument into insn gen function directly, because of the
-	     insn template definition.  As a solution, generate a integral mode
-	     subreg of target, then copy selector vector (that is in integral
-	     mode) to this subreg.  */
-	  switch (d->vmode)
-	    {
-	    case E_V4DFmode:
-	      sel = gen_rtx_CONST_VECTOR (E_V4DImode, gen_rtvec_v (d->nelt,
-								   rperm));
-	      tmp = simplify_gen_subreg (E_V4DImode, d->target, d->vmode, 0);
-	      emit_move_insn (tmp, sel);
-	      break;
-	    case E_V8SFmode:
-	      sel = gen_rtx_CONST_VECTOR (E_V8SImode, gen_rtvec_v (d->nelt,
-								   rperm));
-	      tmp = simplify_gen_subreg (E_V8SImode, d->target, d->vmode, 0);
-	      emit_move_insn (tmp, sel);
-	      break;
-	    default:
-	      sel = gen_rtx_CONST_VECTOR (d->vmode, gen_rtvec_v (d->nelt,
-								 rperm));
-	      emit_move_insn (d->target, sel);
-	      break;
-	    }
-
 	  target = d->target;
 	  op0 = d->op0;
 	  op1 = d->one_vector_p ? d->op0 : d->op1;
 
-	  /* We FINALLY can generate xvshuf.* insn.  */
-	  switch (d->vmode)
-	    {
-	    case E_V4DFmode:
-	      emit_insn (gen_lasx_xvshuf_d_f (target, target, op1, op0));
-	      break;
-	    case E_V4DImode:
-	      emit_insn (gen_lasx_xvshuf_d (target, target, op1, op0));
-	      break;
-	    case E_V8SFmode:
-	      emit_insn (gen_lasx_xvshuf_w_f (target, target, op1, op0));
-	      break;
-	    case E_V8SImode:
-	      emit_insn (gen_lasx_xvshuf_w (target, target, op1, op0));
-	      break;
-	    case E_V16HImode:
-	      emit_insn (gen_lasx_xvshuf_h (target, target, op1, op0));
-	      break;
-	    case E_V32QImode:
-	      emit_insn (gen_lasx_xvshuf_b (target, op1, op0, target));
-	      break;
-	    default:
-	      gcc_unreachable ();
-	      break;
-	    }
+	  machine_mode sel_mode = related_int_vector_mode (d->vmode)
+	    .require ();
+	  rtvec sel_v = gen_rtvec_v (d->nelt, rperm);
+
+	  /* See the comment in loongarch_expand_lsx_shuffle for why
+	     we don't simply use a SUBREG to pun target.  */
+	  rtx sel = force_reg (sel_mode,
+			       gen_rtx_CONST_VECTOR (sel_mode, sel_v));
+
+	  if (d->vmode == E_V32QImode)
+	    emit_insn (gen_lasx_xvshuf_b (target, op1, op0, sel));
+	  else
+	    emit_insn (gen_lasx_xvshuf (d->vmode, target, sel, op1, op0));
 
 	  return true;
 	}
diff --git a/gcc/config/loongarch/lsx.md b/gcc/config/loongarch/lsx.md
index 8cf7ad917ec..b1bccccb5e5 100644
--- a/gcc/config/loongarch/lsx.md
+++ b/gcc/config/loongarch/lsx.md
@@ -561,9 +561,9 @@
   DONE;
 })
 
-(define_insn "lsx_vshuf_<lsxfmt_f>"
+(define_insn "@lsx_vshuf_<lsxfmt_f>"
   [(set (match_operand:LSX_DWH 0 "register_operand" "=f")
-	(unspec:LSX_DWH [(match_operand:LSX_DWH 1 "register_operand" "0")
+	(unspec:LSX_DWH [(match_operand:<VIMODE> 1 "register_operand" "0")
 			 (match_operand:LSX_DWH 2 "register_operand" "f")
 			 (match_operand:LSX_DWH 3 "register_operand" "f")]
 			UNSPEC_LSX_VSHUF))]
diff --git a/gcc/testsuite/gcc.target/loongarch/pr121064.c b/gcc/testsuite/gcc.target/loongarch/pr121064.c
new file mode 100644
index 00000000000..a466c7abc70
--- /dev/null
+++ b/gcc/testsuite/gcc.target/loongarch/pr121064.c
@@ -0,0 +1,38 @@
+/* { dg-require-effective-target loongarch_sx_hw } */
+/* { dg-do run } */
+/* { dg-options "-march=loongarch64 -mfpu=64 -mlsx -O3" } */
+
+typedef __INT32_TYPE__ int32_t;
+typedef unsigned __INT32_TYPE__ uint32_t;
+
+__attribute__ ((noipa)) static int32_t
+long_filter_ehigh_3830_1 (int32_t *buffer, int length)
+{
+  int i, j;
+  int32_t dotprod = 0;
+  int32_t delay[4] = { 0 };
+  uint32_t coeffs[4] = { 0 };
+
+  for (i = 0; i < length; i++)
+    {
+      dotprod = 0;
+      for (j = 3; j >= 0; j--)
+        {
+          dotprod += delay[j] * coeffs[j];
+          coeffs[j] += ((delay[j] >> 31) | 1);
+        }
+      for (j = 3; j > 0; j--)
+        delay[j] = delay[j - 1];
+      delay[0] = buffer[i];
+    }
+
+  return dotprod;
+}
+
+int
+main ()
+{
+  int32_t buffer[] = { -1, 1 };
+  if (long_filter_ehigh_3830_1 (buffer, 2) != -1)
+    __builtin_trap ();
+}
-- 
Gitee


From dfc0fd2eec82d5363f0a331cd71a52a5ec682ece Mon Sep 17 00:00:00 2001
From: Lulu Cheng <chenglulu@loongson.cn>
Date: Thu, 24 Jul 2025 19:07:25 +0800
Subject: [PATCH 05/31] LoongArch: Remove the definition of
 CASE_VECTOR_SHORTEN_MODE.

On LoongArch, the switch jump-table always stores absolute
addresses, so there is no need to define the macro
CASE_VECTOR_SHORTEN_MODE.

gcc/ChangeLog:

	* config/loongarch/loongarch.h
	(CASE_VECTOR_SHORTEN_MODE): Delete.
---
 gcc/config/loongarch/loongarch.h | 2 --
 1 file changed, 2 deletions(-)

diff --git a/gcc/config/loongarch/loongarch.h b/gcc/config/loongarch/loongarch.h
index 10b5a0b0f32..62307b94b53 100644
--- a/gcc/config/loongarch/loongarch.h
+++ b/gcc/config/loongarch/loongarch.h
@@ -824,8 +824,6 @@ typedef struct {
 
 #define CASE_VECTOR_MODE Pmode
 
-#define CASE_VECTOR_SHORTEN_MODE(MIN, MAX, BODY) Pmode
-
 /* Define this as 1 if `char' should by default be signed; else as 0.  */
 #ifndef DEFAULT_SIGNED_CHAR
 #define DEFAULT_SIGNED_CHAR 1
-- 
Gitee


From 8ce0be51b32c791248c2f16156562a1c4ffd34ba Mon Sep 17 00:00:00 2001
From: mengqinggang <mengqinggang@loongson.cn>
Date: Fri, 8 Aug 2025 16:22:59 +0800
Subject: [PATCH 06/31] LoongArch: macro instead enum for base abi type

enum can't be used in #if.
For #if expression, identifiers that are not macros,
which are all considered to be the number zero.

This patch may fix https://sourceware.org/bugzilla/show_bug.cgi?id=32776.

gcc/ChangeLog:

	* config/loongarch/loongarch-def.h (ABI_BASE_LP64D): New macro.
	(ABI_BASE_LP64F): New macro.
	(ABI_BASE_LP64S): New macro.
	(N_ABI_BASE_TYPES): New macro.
---
 gcc/config/loongarch/loongarch-def.h | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/gcc/config/loongarch/loongarch-def.h b/gcc/config/loongarch/loongarch-def.h
index 10b5f9ddc01..06eb107d391 100644
--- a/gcc/config/loongarch/loongarch-def.h
+++ b/gcc/config/loongarch/loongarch-def.h
@@ -78,12 +78,10 @@ extern loongarch_def_array<const char *, N_ISA_EXT_TYPES>
 
 
 /* Base ABI */
-enum {
-  ABI_BASE_LP64D	= 0,
-  ABI_BASE_LP64F	= 1,
-  ABI_BASE_LP64S	= 2,
-  N_ABI_BASE_TYPES	= 3
-};
+#define ABI_BASE_LP64D	  0
+#define ABI_BASE_LP64F	  1
+#define ABI_BASE_LP64S	  2
+#define N_ABI_BASE_TYPES  3
 
 extern loongarch_def_array<const char *, N_ABI_BASE_TYPES>
   loongarch_abi_base_strings;
-- 
Gitee


From 0ef1c9545d83b7c7985e42cd611fe3de2761d86b Mon Sep 17 00:00:00 2001
From: Lulu Cheng <chenglulu@loongson.cn>
Date: Wed, 18 Mar 2026 14:38:42 +0800
Subject: [PATCH 07/31] LoongArch: Define hook
 TARGET_COMPUTE_PRESSURE_CLASSES[PR120476].

The rtx cost value defined by the target backend affects the
calculation of register pressure classes in the IRA, thus affecting
scheduling.  This may cause program performance degradation.
For example, OpenSSL 3.5.1 SHA512 and SPEC CPU 2017 exchange_r.

This problem can be avoided by defining a set of register pressure
classes in the target backend instead of using the default IRA to
automatically calculate them.
---
 gcc/config/loongarch/loongarch.cc | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc
index 883db986aae..8f7ad207efc 100644
--- a/gcc/config/loongarch/loongarch.cc
+++ b/gcc/config/loongarch/loongarch.cc
@@ -11147,6 +11147,18 @@ loongarch_noce_conversion_profitable_p (rtx_insn *seq, struct noce_if_info *if_i
   return default_noce_conversion_profitable_p (seq, if_info);
 }
 
+/* Implement TARGET_COMPUTE_PRESSURE_CLASSES.  */
+
+static int
+loongarch_compute_pressure_classes (reg_class *classes)
+{
+  int i = 0;
+  classes[i++] = GENERAL_REGS;
+  classes[i++] = FP_REGS;
+  classes[i++] = FCC_REGS;
+  return i;
+}
+
 /* Initialize the GCC target structure.  */
 #undef TARGET_ASM_ALIGNED_HI_OP
 #define TARGET_ASM_ALIGNED_HI_OP "\t.half\t"
@@ -11421,6 +11433,9 @@ loongarch_noce_conversion_profitable_p (rtx_insn *seq, struct noce_if_info *if_i
 #undef TARGET_NOCE_CONVERSION_PROFITABLE_P
 #define TARGET_NOCE_CONVERSION_PROFITABLE_P loongarch_noce_conversion_profitable_p
 
+#undef TARGET_COMPUTE_PRESSURE_CLASSES
+#define TARGET_COMPUTE_PRESSURE_CLASSES loongarch_compute_pressure_classes
+
 struct gcc_target targetm = TARGET_INITIALIZER;
 
 #include "gt-loongarch.h"
-- 
Gitee


From 5de5d2fb3aedf487764588c1de60fde61a91a47e Mon Sep 17 00:00:00 2001
From: Lulu Cheng <chenglulu@loongson.cn>
Date: Thu, 14 Aug 2025 11:59:53 +0800
Subject: [PATCH 08/31] LoongArch: Fix ICE caused by function
 add_stmt_cost[PR121542].

	PR target/121542

gcc/ChangeLog:

	* config/loongarch/loongarch.cc
	(loongarch_vector_costs::add_stmt_cost): When using vectype,
	first determine whether it is NULL.

gcc/testsuite/ChangeLog:

	* gcc.target/loongarch/pr121542.c: New test.
---
 gcc/config/loongarch/loongarch.cc             |  1 +
 gcc/testsuite/gcc.target/loongarch/pr121542.c | 54 +++++++++++++++++++
 2 files changed, 55 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/loongarch/pr121542.c

diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc
index 8f7ad207efc..929fe72e5c9 100644
--- a/gcc/config/loongarch/loongarch.cc
+++ b/gcc/config/loongarch/loongarch.cc
@@ -4380,6 +4380,7 @@ loongarch_vector_costs::add_stmt_cost (int count, vect_cost_for_stmt kind,
 	    break;
 	  }
       else if (TARGET_RECIP_VEC_DIV
+	       && vectype
 	       && gimple_code (stmt_info->stmt) == GIMPLE_ASSIGN)
 	{
 	  machine_mode mode = TYPE_MODE (vectype);
diff --git a/gcc/testsuite/gcc.target/loongarch/pr121542.c b/gcc/testsuite/gcc.target/loongarch/pr121542.c
new file mode 100644
index 00000000000..51a5e3c4480
--- /dev/null
+++ b/gcc/testsuite/gcc.target/loongarch/pr121542.c
@@ -0,0 +1,54 @@
+/* { dg-do compile } */
+/* { dg-options "-mrecip=all  -mfrecipe -mabi=lp64d -march=loongarch64 -mfpu=64 -msimd=lasx -Ofast" } */
+
+typedef long unsigned int STRLEN;
+typedef struct sv SV;
+struct sv
+{
+  void *sv_any;
+  unsigned int sv_refcnt;
+  unsigned int sv_flags;
+};
+typedef struct xpv XPV;
+struct xpv
+{
+  char *xpv_pv;
+  STRLEN xpv_cur;
+  STRLEN xpv_len;
+};
+typedef unsigned long UV;
+extern char *PL_bufend;
+extern char *d;
+SV *Perl_newSV (STRLEN len);
+
+char *
+S_scan_const (char *start)
+{
+  register char *send = PL_bufend;
+  SV *sv = Perl_newSV (send - start);
+  register char *s = start;
+  UV uv;
+
+  while (s < send)
+    {
+      if (!(((UV)(uv)) < 0x80))
+        {
+          int hicount = 0;
+          unsigned char *c;
+          for (c = (unsigned char *)((XPV *)(sv)->sv_any)->xpv_pv;
+               c < (unsigned char *)d; c++)
+            {
+              if (!(((UV)(*c)) < 0x80))
+                {
+                  hicount++;
+                }
+            }
+          d += hicount;
+          *d++ = (char)uv;
+        }
+
+      s++;
+    }
+
+  return s;
+}
-- 
Gitee


From f0ba5dcedebdf83eb9e0f74dc14211e3b1b55951 Mon Sep 17 00:00:00 2001
From: Xi Ruoyao <xry111@xry111.site>
Date: Sat, 1 Mar 2025 11:46:41 +0800
Subject: [PATCH 09/31] LoongArch: (NFC) Remove atomic_optab and use amop
 instead

They are the same.

gcc/ChangeLog:

	* config/loongarch/sync.md (atomic_optab): Remove.
	(atomic_<atomic_optab><mode>): Change atomic_optab to amop.
	(atomic_fetch_<atomic_optab><mode>): Likewise.
---
 gcc/config/loongarch/sync.md | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/gcc/config/loongarch/sync.md b/gcc/config/loongarch/sync.md
index 2e008c4874e..92962ba6429 100644
--- a/gcc/config/loongarch/sync.md
+++ b/gcc/config/loongarch/sync.md
@@ -35,8 +35,6 @@
 ])
 
 (define_code_iterator any_atomic [plus ior xor and])
-(define_code_attr atomic_optab
-  [(plus "add") (ior "or") (xor "xor") (and "and")])
 
 ;; This attribute gives the format suffix for atomic memory operations.
 (define_mode_attr amo [(QI "b") (HI "h") (SI "w") (DI "d")])
@@ -175,7 +173,7 @@
 }
   [(set (attr "length") (const_int 12))])
 
-(define_insn "atomic_<atomic_optab><mode>"
+(define_insn "atomic_<amop><mode>"
   [(set (match_operand:GPR 0 "memory_operand" "+ZB")
 	(unspec_volatile:GPR
 	  [(any_atomic:GPR (match_dup 0)
@@ -197,7 +195,7 @@
   "amadd%A2.<amo>\t$zero,%z1,%0"
   [(set (attr "length") (const_int 4))])
 
-(define_insn "atomic_fetch_<atomic_optab><mode>"
+(define_insn "atomic_fetch_<amop><mode>"
   [(set (match_operand:GPR 0 "register_operand" "=&r")
 	(match_operand:GPR 1 "memory_operand" "+ZB"))
    (set (match_dup 1)
-- 
Gitee


From 2a071039c38de737cdfe0d3c13dafe1e4accfa79 Mon Sep 17 00:00:00 2001
From: Xi Ruoyao <xry111@xry111.site>
Date: Sat, 1 Mar 2025 11:46:42 +0800
Subject: [PATCH 10/31] LoongArch: (NFC) Remove amo and use size instead

They are the same.

gcc/ChangeLog:

	* config/loongarch/sync.md: Use <size> instead of <amo>.
	(amo): Remove.
---
 gcc/config/loongarch/sync.md | 53 +++++++++++++++++-------------------
 1 file changed, 25 insertions(+), 28 deletions(-)

diff --git a/gcc/config/loongarch/sync.md b/gcc/config/loongarch/sync.md
index 92962ba6429..5fdb149ee46 100644
--- a/gcc/config/loongarch/sync.md
+++ b/gcc/config/loongarch/sync.md
@@ -36,9 +36,6 @@
 
 (define_code_iterator any_atomic [plus ior xor and])
 
-;; This attribute gives the format suffix for atomic memory operations.
-(define_mode_attr amo [(QI "b") (HI "h") (SI "w") (DI "d")])
-
 ;; <amop> expands to the name of the atomic operand that implements a
 ;; particular code.
 (define_code_attr amop [(ior "or") (xor "xor") (and "and") (plus "add")])
@@ -181,7 +178,7 @@
 	   (match_operand:SI 2 "const_int_operand")] ;; model
 	 UNSPEC_SYNC_OLD_OP))]
   ""
-  "am<amop>%A2.<amo>\t$zero,%z1,%0"
+  "am<amop>%A2.<size>\t$zero,%z1,%0"
   [(set (attr "length") (const_int 4))])
 
 (define_insn "atomic_add<mode>"
@@ -192,7 +189,7 @@
 	   (match_operand:SI 2 "const_int_operand")] ;; model
 	 UNSPEC_SYNC_OLD_OP))]
   "ISA_HAS_LAM_BH"
-  "amadd%A2.<amo>\t$zero,%z1,%0"
+  "amadd%A2.<size>\t$zero,%z1,%0"
   [(set (attr "length") (const_int 4))])
 
 (define_insn "atomic_fetch_<amop><mode>"
@@ -205,7 +202,7 @@
 	   (match_operand:SI 3 "const_int_operand")] ;; model
 	 UNSPEC_SYNC_OLD_OP))]
   ""
-  "am<amop>%A3.<amo>\t%0,%z2,%1"
+  "am<amop>%A3.<size>\t%0,%z2,%1"
   [(set (attr "length") (const_int 4))])
 
 (define_insn "atomic_exchange<mode>"
@@ -217,7 +214,7 @@
    (set (match_dup 1)
 	(match_operand:GPR 2 "register_operand" "r"))]
   ""
-  "amswap%A3.<amo>\t%0,%z2,%1"
+  "amswap%A3.<size>\t%0,%z2,%1"
   [(set (attr "length") (const_int 4))])
 
 (define_insn "atomic_exchange<mode>_short"
@@ -229,7 +226,7 @@
    (set (match_dup 1)
 	(match_operand:SHORT 2 "register_operand" "r"))]
   "ISA_HAS_LAM_BH"
-  "amswap%A3.<amo>\t%0,%z2,%1"
+  "amswap%A3.<size>\t%0,%z2,%1"
   [(set (attr "length") (const_int 4))])
 
 (define_insn "atomic_cas_value_strong<mode>"
@@ -244,7 +241,7 @@
   ""
 {
   output_asm_insn ("1:", operands);
-  output_asm_insn ("ll.<amo>\t%0,%1", operands);
+  output_asm_insn ("ll.<size>\t%0,%1", operands);
 
   /* Like the test case atomic-cas-int.C, in loongarch64, O1 and higher, the
      return value of the val_without_const_folding will not be truncated and
@@ -264,7 +261,7 @@
     output_asm_insn ("bne\t%0,%z2,2f", operands);
 
   output_asm_insn ("or%i3\t%5,$zero,%3", operands);
-  output_asm_insn ("sc.<amo>\t%5,%1", operands);
+  output_asm_insn ("sc.<size>\t%5,%1", operands);
   output_asm_insn ("beqz\t%5,1b", operands);
   output_asm_insn ("b\t3f", operands);
   output_asm_insn ("2:", operands);
@@ -289,7 +286,7 @@
 			       (match_operand:SI 4 "const_int_operand")]  ;; mod_s
 	 UNSPEC_COMPARE_AND_SWAP))]
   "ISA_HAS_LAMCAS"
-  "ori\t%0,%z2,0\n\tamcas%A4.<amo>\t%0,%z3,%1"
+  "ori\t%0,%z2,0\n\tamcas%A4.<size>\t%0,%z3,%1"
   [(set (attr "length") (const_int 8))])
 
 (define_expand "atomic_compare_and_swap<mode>"
@@ -404,12 +401,12 @@
   ""
 {
   return "1:\\n\\t"
-	 "ll.<amo>\\t%0,%1\\n\\t"
+	 "ll.<size>\\t%0,%1\\n\\t"
 	 "and\\t%7,%0,%2\\n\\t"
 	 "bne\\t%7,%z4,2f\\n\\t"
 	 "and\\t%7,%0,%z3\\n\\t"
 	 "or%i5\\t%7,%7,%5\\n\\t"
-	 "sc.<amo>\\t%7,%1\\n\\t"
+	 "sc.<size>\\t%7,%1\\n\\t"
 	 "beq\\t$zero,%7,1b\\n\\t"
 	 "b\\t3f\\n\\t"
 	 "2:\\n\\t"
@@ -494,12 +491,12 @@
   ""
 {
   return "1:\\n\\t"
-	 "ll.<amo>\\t%0,%1\\n\\t"
+	 "ll.<size>\\t%0,%1\\n\\t"
 	 "and\\t%7,%0,%3\\n\\t"
 	 "add.w\\t%8,%0,%z5\\n\\t"
 	 "and\\t%8,%8,%z2\\n\\t"
 	 "or%i8\\t%7,%7,%8\\n\\t"
-	 "sc.<amo>\\t%7,%1\\n\\t"
+	 "sc.<size>\\t%7,%1\\n\\t"
 	 "beq\\t$zero,%7,1b";
 }
 
@@ -520,12 +517,12 @@
   ""
 {
   return "1:\\n\\t"
-	 "ll.<amo>\\t%0,%1\\n\\t"
+	 "ll.<size>\\t%0,%1\\n\\t"
 	 "and\\t%7,%0,%3\\n\\t"
 	 "sub.w\\t%8,%0,%z5\\n\\t"
 	 "and\\t%8,%8,%z2\\n\\t"
 	 "or%i8\\t%7,%7,%8\\n\\t"
-	 "sc.<amo>\\t%7,%1\\n\\t"
+	 "sc.<size>\\t%7,%1\\n\\t"
 	 "beq\\t$zero,%7,1b";
 }
   [(set (attr "length") (const_int 28))])
@@ -545,12 +542,12 @@
   ""
 {
   return "1:\\n\\t"
-	 "ll.<amo>\\t%0,%1\\n\\t"
+	 "ll.<size>\\t%0,%1\\n\\t"
 	 "and\\t%7,%0,%3\\n\\t"
 	 "and\\t%8,%0,%z5\\n\\t"
 	 "and\\t%8,%8,%z2\\n\\t"
 	 "or%i8\\t%7,%7,%8\\n\\t"
-	 "sc.<amo>\\t%7,%1\\n\\t"
+	 "sc.<size>\\t%7,%1\\n\\t"
 	 "beq\\t$zero,%7,1b";
 }
   [(set (attr "length") (const_int 28))])
@@ -570,12 +567,12 @@
   ""
 {
   return "1:\\n\\t"
-	 "ll.<amo>\\t%0,%1\\n\\t"
+	 "ll.<size>\\t%0,%1\\n\\t"
 	 "and\\t%7,%0,%3\\n\\t"
 	 "xor\\t%8,%0,%z5\\n\\t"
 	 "and\\t%8,%8,%z2\\n\\t"
 	 "or%i8\\t%7,%7,%8\\n\\t"
-	 "sc.<amo>\\t%7,%1\\n\\t"
+	 "sc.<size>\\t%7,%1\\n\\t"
 	 "beq\\t$zero,%7,1b";
 }
 
@@ -596,12 +593,12 @@
   ""
 {
   return "1:\\n\\t"
-	 "ll.<amo>\\t%0,%1\\n\\t"
+	 "ll.<size>\\t%0,%1\\n\\t"
 	 "and\\t%7,%0,%3\\n\\t"
 	 "or\\t%8,%0,%z5\\n\\t"
 	 "and\\t%8,%8,%z2\\n\\t"
 	 "or%i8\\t%7,%7,%8\\n\\t"
-	 "sc.<amo>\\t%7,%1\\n\\t"
+	 "sc.<size>\\t%7,%1\\n\\t"
 	 "beq\\t$zero,%7,1b";
 }
 
@@ -622,12 +619,12 @@
   ""
 {
   return "1:\\n\\t"
-	 "ll.<amo>\\t%0,%1\\n\\t"
+	 "ll.<size>\\t%0,%1\\n\\t"
 	 "and\\t%7,%0,%3\\n\\t"
 	 "and\\t%8,%0,%z5\\n\\t"
 	 "xor\\t%8,%8,%z2\\n\\t"
 	 "or%i8\\t%7,%7,%8\\n\\t"
-	 "sc.<amo>\\t%7,%1\\n\\t"
+	 "sc.<size>\\t%7,%1\\n\\t"
 	 "beq\\t$zero,%7,1b";
 }
   [(set (attr "length") (const_int 28))])
@@ -646,10 +643,10 @@
   ""
 {
   return "1:\\n\\t"
-	 "ll.<amo>\\t%0,%1\\n\\t"
+	 "ll.<size>\\t%0,%1\\n\\t"
 	 "and\\t%7,%0,%z3\\n\\t"
 	 "or%i5\\t%7,%7,%5\\n\\t"
-	 "sc.<amo>\\t%7,%1\\n\\t"
+	 "sc.<size>\\t%7,%1\\n\\t"
 	 "beqz\\t%7,1b\\n\\t";
 }
   [(set (attr "length") (const_int 20))])
@@ -686,7 +683,7 @@
 	   (match_operand:SI 3 "const_int_operand")] ;; model
 	 UNSPEC_SYNC_OLD_OP))]
   "ISA_HAS_LAM_BH"
-  "amadd%A3.<amo>\t%0,%z2,%1"
+  "amadd%A3.<size>\t%0,%z2,%1"
   [(set (attr "length") (const_int 4))])
 
 (define_expand "atomic_fetch_add<mode>"
-- 
Gitee


From 1ac0e9b8e0f6123a82ec282f86955b954d9ef72c Mon Sep 17 00:00:00 2001
From: Xi Ruoyao <xry111@xry111.site>
Date: Sat, 1 Mar 2025 11:46:43 +0800
Subject: [PATCH 11/31] LoongArch: Don't use "+" for atomic_{load, store} "m"
 constraint

Atomic load does not modify the memory.  Atomic store does not read the
memory, thus we can use "=" instead.

gcc/ChangeLog:

	* config/loongarch/sync.md (atomic_load<mode>): Remove "+" for
	the memory operand.
	(atomic_store<mode>): Use "=" instead of "+" for the memory
	operand.
---
 gcc/config/loongarch/sync.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/gcc/config/loongarch/sync.md b/gcc/config/loongarch/sync.md
index 5fdb149ee46..ef1b9118d64 100644
--- a/gcc/config/loongarch/sync.md
+++ b/gcc/config/loongarch/sync.md
@@ -102,7 +102,7 @@
 (define_insn "atomic_load<mode>"
   [(set (match_operand:QHWD 0 "register_operand" "=r")
     (unspec_volatile:QHWD
-      [(match_operand:QHWD 1 "memory_operand" "+m")
+      [(match_operand:QHWD 1 "memory_operand" "m")
        (match_operand:SI 2 "const_int_operand")]                        ;; model
       UNSPEC_ATOMIC_LOAD))]
   ""
@@ -139,7 +139,7 @@
 
 ;; Implement atomic stores with amoswap.  Fall back to fences for atomic loads.
 (define_insn "atomic_store<mode>"
-  [(set (match_operand:QHWD 0 "memory_operand" "+m")
+  [(set (match_operand:QHWD 0 "memory_operand" "=m")
     (unspec_volatile:QHWD
       [(match_operand:QHWD 1 "reg_or_0_operand" "rJ")
        (match_operand:SI 2 "const_int_operand")]      ;; model
-- 
Gitee


From 800a3913d2b0c75e498aa1fd2a277695cdd90bd6 Mon Sep 17 00:00:00 2001
From: Xi Ruoyao <xry111@xry111.site>
Date: Sat, 1 Mar 2025 11:46:44 +0800
Subject: [PATCH 12/31] LoongArch: Allow using bstrins for masking the address
 in atomic_test_and_set

We can use bstrins for masking the address here.  As people are already
working on LA32R (which lacks bstrins instructions), for future-proofing
we check whether (const_int -4) is an and_operand and force it into an
register if not.

gcc/ChangeLog:

	* config/loongarch/sync.md (atomic_test_and_set): Use bstrins
	for masking the address if possible.
---
 gcc/config/loongarch/sync.md | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/gcc/config/loongarch/sync.md b/gcc/config/loongarch/sync.md
index ef1b9118d64..affab1f6fcf 100644
--- a/gcc/config/loongarch/sync.md
+++ b/gcc/config/loongarch/sync.md
@@ -356,12 +356,13 @@
   rtx mem = operands[1];
   rtx model = operands[2];
   rtx addr = force_reg (Pmode, XEXP (mem, 0));
-  rtx tmp_reg = gen_reg_rtx (Pmode);
-  rtx zero_reg = gen_rtx_REG (Pmode, 0);
-
+  rtx mask = gen_int_mode (-4, Pmode);
   rtx aligned_addr = gen_reg_rtx (Pmode);
-  emit_move_insn (tmp_reg, gen_rtx_PLUS (Pmode, zero_reg, GEN_INT (-4)));
-  emit_move_insn (aligned_addr, gen_rtx_AND (Pmode, addr, tmp_reg));
+
+  if (!and_operand (mask, Pmode))
+    mask = force_reg (Pmode, mask);
+
+  emit_move_insn (aligned_addr, gen_rtx_AND (Pmode, addr, mask));
 
   rtx aligned_mem = change_address (mem, SImode, aligned_addr);
   set_mem_alias_set (aligned_mem, 0);
-- 
Gitee


From 78e8063efb4764085887598c9ee56043c1bde7c0 Mon Sep 17 00:00:00 2001
From: Xi Ruoyao <xry111@xry111.site>
Date: Sat, 1 Mar 2025 11:46:45 +0800
Subject: [PATCH 13/31] LoongArch: Don't emit overly-restrictive barrier for
 LL-SC loops

For LL-SC loops, if the atomic operation has succeeded, the SC
instruction always imply a full barrier, so the barrier we manually
inserted only needs to take the account for the failure memorder, not
the success memorder (the barrier is skipped with "b 3f" on success
anyway).

Note that if we use the AMCAS instructions, we indeed need to consider
both the success memorder an the failure memorder deciding if "_db"
suffix is needed.  Thus the semantics of atomic_cas_value_strong<mode>
and atomic_cas_value_strong<mode>_amcas start to be different.  To
prevent the compiler from being too clever, use a different unspec code
for AMCAS instructions.

gcc/ChangeLog:

	* config/loongarch/sync.md (UNSPEC_COMPARE_AND_SWAP_AMCAS): New
	UNSPEC code.
	(atomic_cas_value_strong<mode>): NFC, update the comment to note
	we only need to consider failure memory order.
	(atomic_cas_value_strong<mode>_amcas): Use
	UNSPEC_COMPARE_AND_SWAP_AMCAS instead of
	UNSPEC_COMPARE_AND_SWAP.
	(atomic_compare_and_swap<mode:GPR>): Pass failure memorder to
	gen_atomic_cas_value_strong<mode>.
	(atomic_compare_and_swap<mode:SHORT>): Pass failure memorder to
	gen_atomic_cas_value_cmp_and_7_si.
---
 gcc/config/loongarch/sync.md | 21 +++++++++------------
 1 file changed, 9 insertions(+), 12 deletions(-)

diff --git a/gcc/config/loongarch/sync.md b/gcc/config/loongarch/sync.md
index affab1f6fcf..f19ad6af4d3 100644
--- a/gcc/config/loongarch/sync.md
+++ b/gcc/config/loongarch/sync.md
@@ -21,6 +21,7 @@
 
 (define_c_enum "unspec" [
   UNSPEC_COMPARE_AND_SWAP
+  UNSPEC_COMPARE_AND_SWAP_AMCAS
   UNSPEC_COMPARE_AND_SWAP_ADD
   UNSPEC_COMPARE_AND_SWAP_SUB
   UNSPEC_COMPARE_AND_SWAP_AND
@@ -235,7 +236,7 @@
    (set (match_dup 1)
 	(unspec_volatile:GPR [(match_operand:GPR 2 "reg_or_0_operand" "rJ")
 			      (match_operand:GPR 3 "reg_or_0_operand" "rJ")
-			      (match_operand:SI 4 "const_int_operand")]  ;; mod_s
+			      (match_operand:SI 4 "const_int_operand")]  ;; mod_f
 	 UNSPEC_COMPARE_AND_SWAP))
    (clobber (match_scratch:GPR 5 "=&r"))]
   ""
@@ -283,8 +284,8 @@
    (set (match_dup 1)
 	(unspec_volatile:QHWD [(match_operand:QHWD 2 "reg_or_0_operand" "rJ")
 			       (match_operand:QHWD 3 "reg_or_0_operand" "rJ")
-			       (match_operand:SI 4 "const_int_operand")]  ;; mod_s
-	 UNSPEC_COMPARE_AND_SWAP))]
+			       (match_operand:SI 4 "const_int_operand")]  ;; mod
+	 UNSPEC_COMPARE_AND_SWAP_AMCAS))]
   "ISA_HAS_LAMCAS"
   "ori\t%0,%z2,0\n\tamcas%A4.<size>\t%0,%z3,%1"
   [(set (attr "length") (const_int 8))])
@@ -313,16 +314,14 @@
       && is_mm_release (memmodel_base (INTVAL (mod_s))))
     mod_s = GEN_INT (MEMMODEL_ACQ_REL);
 
-  operands[6] = mod_s;
-
   if (ISA_HAS_LAMCAS)
     emit_insn (gen_atomic_cas_value_strong<mode>_amcas (operands[1], operands[2],
 							 operands[3], operands[4],
-							 operands[6]));
+							 mod_s));
   else
     emit_insn (gen_atomic_cas_value_strong<mode> (operands[1], operands[2],
 						  operands[3], operands[4],
-						  operands[6]));
+						  mod_f));
 
   rtx compare = operands[1];
   if (operands[3] != const0_rtx)
@@ -396,7 +395,7 @@
 			      (match_operand:GPR 3 "reg_or_0_operand" "rJ")
 			      (match_operand:GPR 4 "reg_or_0_operand"  "rJ")
 			      (match_operand:GPR 5 "reg_or_0_operand"  "rJ")
-			      (match_operand:SI 6 "const_int_operand")] ;; model
+			      (match_operand:SI 6 "const_int_operand")] ;; mod_f
 	 UNSPEC_COMPARE_AND_SWAP))
    (clobber (match_scratch:GPR 7 "=&r"))]
   ""
@@ -440,18 +439,16 @@
       && is_mm_release (memmodel_base (INTVAL (mod_s))))
     mod_s = GEN_INT (MEMMODEL_ACQ_REL);
 
-  operands[6] = mod_s;
-
   if (ISA_HAS_LAMCAS)
     emit_insn (gen_atomic_cas_value_strong<mode>_amcas (operands[1], operands[2],
 						       operands[3], operands[4],
-						       operands[6]));
+						       mod_s));
   else
     {
       union loongarch_gen_fn_ptrs generator;
       generator.fn_7 = gen_atomic_cas_value_cmp_and_7_si;
       loongarch_expand_atomic_qihi (generator, operands[1], operands[2],
-				    operands[3], operands[4], operands[6]);
+				    operands[3], operands[4], mod_f);
     }
 
       rtx compare = operands[1];
-- 
Gitee


From e0c009c7f2155ad1a316c3162aace5c369e6d4c0 Mon Sep 17 00:00:00 2001
From: Xi Ruoyao <xry111@xry111.site>
Date: Sat, 1 Mar 2025 11:46:46 +0800
Subject: [PATCH 14/31] LoongArch: Remove unneeded "b 3f" instruction after
 LL-SC loops

This instruction is used to skip an redundant barrier if -mno-ld-seq-sa
or the memory model requires a barrier on failure.  But with -mld-seq-sa
and other memory models the barrier may be nonexisting at all, and we
should remove the "b 3f" instruction as well.

The implementation uses a new operand modifier "%T" to output a comment
marker if the operand is a memory order for which the barrier won't be
generated.  "%T", and also "%t", are not really used before and the code
for them in loongarch_print_operand_reloc is just some MIPS legacy.

gcc/ChangeLog:

	* config/loongarch/loongarch.cc (loongarch_print_operand_reloc):
	Make "%T" output a comment marker if the operand is a memory
	order for which the barrier won't be generated; remove "%t".
	* config/loongarch/sync.md (atomic_cas_value_strong<mode>): Add
	%T before "b 3f".
	(atomic_cas_value_cmp_and_7_<mode>): Likewise.
---
 gcc/config/loongarch/loongarch.cc | 19 ++++++++-----------
 gcc/config/loongarch/sync.md      |  4 ++--
 2 files changed, 10 insertions(+), 13 deletions(-)

diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc
index 929fe72e5c9..4690c2a2d96 100644
--- a/gcc/config/loongarch/loongarch.cc
+++ b/gcc/config/loongarch/loongarch.cc
@@ -6234,9 +6234,7 @@ loongarch_print_operand_reloc (FILE *file, rtx op, bool hi64_part,
    'Q'  Print R_LARCH_RELAX for TLS IE.
    'r'  Print address 12-31bit relocation associated with OP.
    'R'  Print address 32-51bit relocation associated with OP.
-   'T'	Print 'f' for (eq:CC ...), 't' for (ne:CC ...),
-	      'z' for (eq:?I ...), 'n' for (ne:?I ...).
-   't'	Like 'T', but with the EQ/NE cases reversed
+   'T'	Print a comment marker if %G outputs nothing.
    'u'	Print a LASX register.
    'v'	Print the insn size suffix b, h, w or d for vector modes V16QI, V8HI,
 	  V4SI, V2SI, and w, d for vector modes V4SF, V2DF respectively.
@@ -6315,6 +6313,13 @@ loongarch_print_operand (FILE *file, rtx op, int letter)
 	fputs ("dbar\t0x700", file);
       break;
 
+    case 'T':
+      if (!loongarch_cas_failure_memorder_needs_acquire (
+	    memmodel_from_int (INTVAL (op)))
+	  && ISA_HAS_LD_SEQ_SA)
+	fprintf (file, "%s", ASM_COMMENT_START);
+      break;
+
     case 'h':
       if (code == HIGH)
 	op = XEXP (op, 0);
@@ -6393,14 +6398,6 @@ loongarch_print_operand (FILE *file, rtx op, int letter)
 				     false /* lo_reloc */);
       break;
 
-    case 't':
-    case 'T':
-      {
-	int truth = (code == NE) == (letter == 'T');
-	fputc ("zfnt"[truth * 2 + FCC_REG_P (REGNO (XEXP (op, 0)))], file);
-      }
-      break;
-
     case 'V':
       if (CONST_VECTOR_P (op))
 	{
diff --git a/gcc/config/loongarch/sync.md b/gcc/config/loongarch/sync.md
index f19ad6af4d3..89a02eebf85 100644
--- a/gcc/config/loongarch/sync.md
+++ b/gcc/config/loongarch/sync.md
@@ -264,7 +264,7 @@
   output_asm_insn ("or%i3\t%5,$zero,%3", operands);
   output_asm_insn ("sc.<size>\t%5,%1", operands);
   output_asm_insn ("beqz\t%5,1b", operands);
-  output_asm_insn ("b\t3f", operands);
+  output_asm_insn ("%T4b\t3f", operands);
   output_asm_insn ("2:", operands);
   output_asm_insn ("%G4", operands);
   output_asm_insn ("3:", operands);
@@ -408,7 +408,7 @@
 	 "or%i5\\t%7,%7,%5\\n\\t"
 	 "sc.<size>\\t%7,%1\\n\\t"
 	 "beq\\t$zero,%7,1b\\n\\t"
-	 "b\\t3f\\n\\t"
+	 "%T6b\\t3f\\n\\t"
 	 "2:\\n\\t"
 	 "%G6\\n\\t"
 	 "3:\\n\\t";
-- 
Gitee


From 528d525e607367e924a6f9d04606e162136d2c93 Mon Sep 17 00:00:00 2001
From: Xi Ruoyao <xry111@xry111.site>
Date: Sat, 1 Mar 2025 11:46:47 +0800
Subject: [PATCH 15/31] LoongArch: Remove unneeded "andi offset, addr, 3"
 instruction in atomic_test_and_set

On LoongArch sll.w and srl.w instructions only take the [4:0] bits of
rk (shift amount) into account, and we've already defined
SHIFT_COUNT_TRUNCATED to 1 so the compiler knows this fact, thus we
don't need this instruction.

gcc/ChangeLog:

	* config/loongarch/sync.md (atomic_test_and_set): Remove
	unneeded andi instruction from the expansion.
---
 gcc/config/loongarch/sync.md | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/gcc/config/loongarch/sync.md b/gcc/config/loongarch/sync.md
index 89a02eebf85..0c1ece987e2 100644
--- a/gcc/config/loongarch/sync.md
+++ b/gcc/config/loongarch/sync.md
@@ -366,15 +366,14 @@
   rtx aligned_mem = change_address (mem, SImode, aligned_addr);
   set_mem_alias_set (aligned_mem, 0);
 
-  rtx offset = gen_reg_rtx (SImode);
-  emit_move_insn (offset, gen_rtx_AND (SImode, gen_lowpart (SImode, addr),
-				       GEN_INT (3)));
-
   rtx tmp = gen_reg_rtx (SImode);
   emit_move_insn (tmp, GEN_INT (1));
 
+  /* Note that we have defined SHIFT_COUNT_TRUNCATED to 1, so we don't need
+     to mask addr with 0b11 here.  */
   rtx shmt = gen_reg_rtx (SImode);
-  emit_move_insn (shmt, gen_rtx_ASHIFT (SImode, offset, GEN_INT (3)));
+  emit_move_insn (shmt, gen_rtx_ASHIFT (SImode, gen_lowpart (SImode, addr),
+					GEN_INT (3)));
 
   rtx word = gen_reg_rtx (SImode);
   emit_move_insn (word, gen_rtx_ASHIFT (SImode, tmp, shmt));
-- 
Gitee


From a0c1f4c3092590980b8ec33fa266be90f76dfa42 Mon Sep 17 00:00:00 2001
From: Xi Ruoyao <xry111@xry111.site>
Date: Sat, 1 Mar 2025 11:46:48 +0800
Subject: [PATCH 16/31] LoongArch: Implement subword atomic_fetch_{and, or,
 xor} with am*.w instructions

We can just shift the mask and fill the other bits with 0 (for ior/xor)
or 1 (for and), and use an am*.w instruction to perform the atomic
operation, instead of using a LL-SC loop.

gcc/ChangeLog:

	* config/loongarch/sync.md (UNSPEC_COMPARE_AND_SWAP_AND):
	Remove.
	(UNSPEC_COMPARE_AND_SWAP_XOR): Remove.
	(UNSPEC_COMPARE_AND_SWAP_OR): Remove.
	(atomic_test_and_set): Rename to ...
	(atomic_fetch_<any_bitwise:amop><SHORT:mode>): ... this, and
	adapt the expansion to use it for any bitwise operations and any
	val, instead of just ior 1.
	(atomic_test_and_set): New define_expand.
---
 gcc/config/loongarch/sync.md | 177 +++++++----------------------------
 1 file changed, 34 insertions(+), 143 deletions(-)

diff --git a/gcc/config/loongarch/sync.md b/gcc/config/loongarch/sync.md
index 0c1ece987e2..14bf480f011 100644
--- a/gcc/config/loongarch/sync.md
+++ b/gcc/config/loongarch/sync.md
@@ -24,9 +24,6 @@
   UNSPEC_COMPARE_AND_SWAP_AMCAS
   UNSPEC_COMPARE_AND_SWAP_ADD
   UNSPEC_COMPARE_AND_SWAP_SUB
-  UNSPEC_COMPARE_AND_SWAP_AND
-  UNSPEC_COMPARE_AND_SWAP_XOR
-  UNSPEC_COMPARE_AND_SWAP_OR
   UNSPEC_COMPARE_AND_SWAP_NAND
   UNSPEC_SYNC_OLD_OP
   UNSPEC_SYNC_EXCHANGE
@@ -343,17 +340,18 @@
   DONE;
 })
 
-(define_expand "atomic_test_and_set"
-  [(match_operand:QI 0 "register_operand" "")     ;; bool output
-   (match_operand:QI 1 "memory_operand" "+ZB")    ;; memory
-   (match_operand:SI 2 "const_int_operand" "")]   ;; model
+(define_expand "atomic_fetch_<amop><mode>"
+  [(match_operand:SHORT 0 "register_operand" "")		 ;; output
+   (any_bitwise (match_operand:SHORT 1 "memory_operand"   "+ZB") ;; memory
+		(match_operand:SHORT 2 "reg_or_0_operand" "rJ")) ;; val
+   (match_operand:SI 3 "const_int_operand" "")]			 ;; model
   ""
 {
-  /* We have no QImode atomics, so use the address LSBs to form a mask,
-     then use an aligned SImode atomic.  */
+  /* We have no QI/HImode bitwise atomics, so use the address LSBs to form
+     a mask, then use an aligned SImode atomic.  */
   rtx result = operands[0];
   rtx mem = operands[1];
-  rtx model = operands[2];
+  rtx model = operands[3];
   rtx addr = force_reg (Pmode, XEXP (mem, 0));
   rtx mask = gen_int_mode (-4, Pmode);
   rtx aligned_addr = gen_reg_rtx (Pmode);
@@ -367,7 +365,8 @@
   set_mem_alias_set (aligned_mem, 0);
 
   rtx tmp = gen_reg_rtx (SImode);
-  emit_move_insn (tmp, GEN_INT (1));
+  emit_move_insn (tmp, simplify_gen_unary (ZERO_EXTEND, SImode,
+					   operands[2], <MODE>mode));
 
   /* Note that we have defined SHIFT_COUNT_TRUNCATED to 1, so we don't need
      to mask addr with 0b11 here.  */
@@ -378,14 +377,37 @@
   rtx word = gen_reg_rtx (SImode);
   emit_move_insn (word, gen_rtx_ASHIFT (SImode, tmp, shmt));
 
+  if (<is_and>)
+    {
+      /* word = word | ~(mode_mask << shmt) */
+      rtx tmp = force_reg (SImode,
+			   gen_int_mode (GET_MODE_MASK (<MODE>mode),
+					 SImode));
+      emit_move_insn (tmp, gen_rtx_ASHIFT (SImode, tmp, shmt));
+      emit_move_insn (word, gen_rtx_IOR (SImode, gen_rtx_NOT (SImode, tmp),
+					 word));
+    }
+
   tmp = gen_reg_rtx (SImode);
-  emit_insn (gen_atomic_fetch_orsi (tmp, aligned_mem, word, model));
+  emit_insn (gen_atomic_fetch_<amop>si (tmp, aligned_mem, word, model));
 
   emit_move_insn (gen_lowpart (SImode, result),
 		  gen_rtx_LSHIFTRT (SImode, tmp, shmt));
   DONE;
 })
 
+(define_expand "atomic_test_and_set"
+  [(match_operand:QI 0 "register_operand" "")     ;; bool output
+   (match_operand:QI 1 "memory_operand" "+ZB")    ;; memory
+   (match_operand:SI 2 "const_int_operand" "")]   ;; model
+  ""
+{
+  rtx one = force_reg (QImode, gen_int_mode (1, QImode));
+  emit_insn (gen_atomic_fetch_orqi (operands[0], operands[1], one,
+				    operands[2]));
+  DONE;
+})
+
 (define_insn "atomic_cas_value_cmp_and_7_<mode>"
   [(set (match_operand:GPR 0 "register_operand" "=&r")
 	(match_operand:GPR 1 "memory_operand" "+ZC"))
@@ -524,83 +546,6 @@
 }
   [(set (attr "length") (const_int 28))])
 
-(define_insn "atomic_cas_value_and_7_<mode>"
-  [(set (match_operand:GPR 0 "register_operand" "=&r")				;; res
-	(match_operand:GPR 1 "memory_operand" "+ZC"))
-   (set (match_dup 1)
-	(unspec_volatile:GPR [(match_operand:GPR 2 "reg_or_0_operand" "rJ")	;; mask
-			      (match_operand:GPR 3 "reg_or_0_operand" "rJ")	;; inverted_mask
-			      (match_operand:GPR 4 "reg_or_0_operand"  "rJ")	;; old val
-			      (match_operand:GPR 5 "reg_or_0_operand"  "rJ")	;; new val
-			      (match_operand:SI 6 "const_int_operand")]		;; model
-	 UNSPEC_COMPARE_AND_SWAP_AND))
-   (clobber (match_scratch:GPR 7 "=&r"))
-   (clobber (match_scratch:GPR 8 "=&r"))]
-  ""
-{
-  return "1:\\n\\t"
-	 "ll.<size>\\t%0,%1\\n\\t"
-	 "and\\t%7,%0,%3\\n\\t"
-	 "and\\t%8,%0,%z5\\n\\t"
-	 "and\\t%8,%8,%z2\\n\\t"
-	 "or%i8\\t%7,%7,%8\\n\\t"
-	 "sc.<size>\\t%7,%1\\n\\t"
-	 "beq\\t$zero,%7,1b";
-}
-  [(set (attr "length") (const_int 28))])
-
-(define_insn "atomic_cas_value_xor_7_<mode>"
-  [(set (match_operand:GPR 0 "register_operand" "=&r")				;; res
-	(match_operand:GPR 1 "memory_operand" "+ZC"))
-   (set (match_dup 1)
-	(unspec_volatile:GPR [(match_operand:GPR 2 "reg_or_0_operand" "rJ")	;; mask
-			      (match_operand:GPR 3 "reg_or_0_operand" "rJ")	;; inverted_mask
-			      (match_operand:GPR 4 "reg_or_0_operand"  "rJ")	;; old val
-			      (match_operand:GPR 5 "reg_or_0_operand"  "rJ")	;; new val
-			      (match_operand:SI 6 "const_int_operand")]		;; model
-	 UNSPEC_COMPARE_AND_SWAP_XOR))
-   (clobber (match_scratch:GPR 7 "=&r"))
-   (clobber (match_scratch:GPR 8 "=&r"))]
-  ""
-{
-  return "1:\\n\\t"
-	 "ll.<size>\\t%0,%1\\n\\t"
-	 "and\\t%7,%0,%3\\n\\t"
-	 "xor\\t%8,%0,%z5\\n\\t"
-	 "and\\t%8,%8,%z2\\n\\t"
-	 "or%i8\\t%7,%7,%8\\n\\t"
-	 "sc.<size>\\t%7,%1\\n\\t"
-	 "beq\\t$zero,%7,1b";
-}
-
-  [(set (attr "length") (const_int 28))])
-
-(define_insn "atomic_cas_value_or_7_<mode>"
-  [(set (match_operand:GPR 0 "register_operand" "=&r")				;; res
-	(match_operand:GPR 1 "memory_operand" "+ZC"))
-   (set (match_dup 1)
-	(unspec_volatile:GPR [(match_operand:GPR 2 "reg_or_0_operand" "rJ")	;; mask
-			      (match_operand:GPR 3 "reg_or_0_operand" "rJ")	;; inverted_mask
-			      (match_operand:GPR 4 "reg_or_0_operand"  "rJ")	;; old val
-			      (match_operand:GPR 5 "reg_or_0_operand"  "rJ")	;; new val
-			      (match_operand:SI 6 "const_int_operand")]		;; model
-	 UNSPEC_COMPARE_AND_SWAP_OR))
-   (clobber (match_scratch:GPR 7 "=&r"))
-   (clobber (match_scratch:GPR 8 "=&r"))]
-  ""
-{
-  return "1:\\n\\t"
-	 "ll.<size>\\t%0,%1\\n\\t"
-	 "and\\t%7,%0,%3\\n\\t"
-	 "or\\t%8,%0,%z5\\n\\t"
-	 "and\\t%8,%8,%z2\\n\\t"
-	 "or%i8\\t%7,%7,%8\\n\\t"
-	 "sc.<size>\\t%7,%1\\n\\t"
-	 "beq\\t$zero,%7,1b";
-}
-
-  [(set (attr "length") (const_int 28))])
-
 (define_insn "atomic_cas_value_nand_7_<mode>"
   [(set (match_operand:GPR 0 "register_operand" "=&r")				;; res
 	(match_operand:GPR 1 "memory_operand" "+ZC"))
@@ -725,60 +670,6 @@
   DONE;
 })
 
-(define_expand "atomic_fetch_and<mode>"
-  [(set (match_operand:SHORT 0 "register_operand" "=&r")
-	(match_operand:SHORT 1 "memory_operand" "+ZB"))
-   (set (match_dup 1)
-	(unspec_volatile:SHORT
-	  [(and:SHORT (match_dup 1)
-		      (match_operand:SHORT 2 "reg_or_0_operand" "rJ"))
-	   (match_operand:SI 3 "const_int_operand")] ;; model
-	 UNSPEC_SYNC_OLD_OP))]
-  ""
-{
-  union loongarch_gen_fn_ptrs generator;
-  generator.fn_7 = gen_atomic_cas_value_and_7_si;
-  loongarch_expand_atomic_qihi (generator, operands[0], operands[1],
-				operands[1], operands[2], operands[3]);
-  DONE;
-})
-
-(define_expand "atomic_fetch_xor<mode>"
-  [(set (match_operand:SHORT 0 "register_operand" "=&r")
-	(match_operand:SHORT 1 "memory_operand" "+ZB"))
-   (set (match_dup 1)
-	(unspec_volatile:SHORT
-	  [(xor:SHORT (match_dup 1)
-		      (match_operand:SHORT 2 "reg_or_0_operand" "rJ"))
-	   (match_operand:SI 3 "const_int_operand")] ;; model
-	 UNSPEC_SYNC_OLD_OP))]
-  ""
-{
-  union loongarch_gen_fn_ptrs generator;
-  generator.fn_7 = gen_atomic_cas_value_xor_7_si;
-  loongarch_expand_atomic_qihi (generator, operands[0], operands[1],
-				operands[1], operands[2], operands[3]);
-  DONE;
-})
-
-(define_expand "atomic_fetch_or<mode>"
-  [(set (match_operand:SHORT 0 "register_operand" "=&r")
-	(match_operand:SHORT 1 "memory_operand" "+ZB"))
-   (set (match_dup 1)
-	(unspec_volatile:SHORT
-	  [(ior:SHORT (match_dup 1)
-		      (match_operand:SHORT 2 "reg_or_0_operand" "rJ"))
-	   (match_operand:SI 3 "const_int_operand")] ;; model
-	 UNSPEC_SYNC_OLD_OP))]
-  ""
-{
-  union loongarch_gen_fn_ptrs generator;
-  generator.fn_7 = gen_atomic_cas_value_or_7_si;
-  loongarch_expand_atomic_qihi (generator, operands[0], operands[1],
-				operands[1], operands[2], operands[3]);
-  DONE;
-})
-
 (define_expand "atomic_fetch_nand<mode>"
   [(set (match_operand:SHORT 0 "register_operand" "=&r")
 	(match_operand:SHORT 1 "memory_operand" "+ZB"))
-- 
Gitee


From 023494d35fd6565d3aa26bf41fb751f1a1012f27 Mon Sep 17 00:00:00 2001
From: Xi Ruoyao <xry111@xry111.site>
Date: Sat, 1 Mar 2025 11:46:49 +0800
Subject: [PATCH 17/31] LoongArch: Don't expand atomic_fetch_sub_{hi, qi} to
 LL-SC loop if -mlam-bh

With -mlam-bh, we should negate the addend first, and use an amadd
instruction.  Disabling the expander makes the compiler do it correctly.

gcc/ChangeLog:

	* config/loongarch/sync.md (atomic_fetch_sub<SHORT:mode>):
	Disable if ISA_HAS_LAM_BH.
---
 gcc/config/loongarch/sync.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/config/loongarch/sync.md b/gcc/config/loongarch/sync.md
index 14bf480f011..ad43c58d016 100644
--- a/gcc/config/loongarch/sync.md
+++ b/gcc/config/loongarch/sync.md
@@ -661,7 +661,7 @@
 			(match_operand:SHORT 2 "reg_or_0_operand" "rJ"))
 	   (match_operand:SI 3 "const_int_operand")] ;; model
 	 UNSPEC_SYNC_OLD_OP))]
-  ""
+  "!ISA_HAS_LAM_BH"
 {
   union loongarch_gen_fn_ptrs generator;
   generator.fn_7 = gen_atomic_cas_value_sub_7_si;
-- 
Gitee


From 42e47e3263dbb1508ea5e80216d20c25f2738874 Mon Sep 17 00:00:00 2001
From: Xi Ruoyao <xry111@xry111.site>
Date: Fri, 12 Sep 2025 15:57:08 +0800
Subject: [PATCH 18/31] LoongArch: Fix wrong code from bstrpick split

After late-combine is added, split1 can see an input like

    (insn 56 55 169 5
      (set (reg/v:DI 87 [ n ])
        (ior:DI (and:DI (reg/v:DI 87 [ n ])
                        (const_int 281474976710655 [0xffffffffffff]))
                (and:DI (reg:DI 131 [ _45 ])
                        (const_int -281474976710656 [0xffff000000000000]))))
      "pr121906.c":22:8 108 {*bstrins_di_for_ior_mask}
      (nil))

And the splitter ends up emitting

    (insn 184 55 185 5
      (set (reg/v:DI 87 [ n ])
           (reg:DI 131 [ _45 ]))
      "pr121906.c":22:8 -1
      (nil))
    (insn 185 184 169 5
      (set (zero_extract:DI (reg/v:DI 87 [ n ])
                            (const_int 48 [0x30])
                            (const_int 0 [0]))
           (reg/v:DI 87 [ n ]))
      "pr121906.c":22:8 -1
      (nil))

which obviously lost everything in r87, instead of retaining its lower
bits as we expect.  It's because the splitter didn't anticipate the
output register may be one of the input registers.

	PR target/121906

gcc/

	* config/loongarch/loongarch.md (*bstrins_<mode>_for_ior_mask):
	Always create a new pseudo for the input register of the bstrins
	instruction.

gcc/testsuite/

	* gcc.target/loongarch/pr121906.c: New test.
---
 gcc/config/loongarch/loongarch.md             | 14 ++++-----
 gcc/testsuite/gcc.target/loongarch/pr121906.c | 31 +++++++++++++++++++
 2 files changed, 38 insertions(+), 7 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/loongarch/pr121906.c

diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md
index f5181093a53..155390fb2d7 100644
--- a/gcc/config/loongarch/loongarch.md
+++ b/gcc/config/loongarch/loongarch.md
@@ -1630,13 +1630,13 @@
     operands[2] = GEN_INT (len);
     operands[4] = GEN_INT (lo);
 
-    if (lo)
-      {
-	rtx tmp = gen_reg_rtx (<MODE>mode);
-	emit_move_insn (tmp, gen_rtx_ASHIFTRT(<MODE>mode, operands[3],
-					      GEN_INT (lo)));
-	operands[3] = tmp;
-      }
+    /* Use a new pseudo register even if lo == 0 or we'll wreck havoc
+       when operands[0] is same as operands[3].  See PR 121906.  */
+    rtx tmp = gen_reg_rtx (<MODE>mode);
+    rtx val = lo ? gen_rtx_ASHIFTRT (<MODE>mode, operands[3], GEN_INT (lo))
+		 : operands[3];
+    emit_move_insn (tmp, val);
+    operands[3] = tmp;
   })
 
 ;; We always avoid the shift operation in bstrins_<mode>_for_ior_mask
diff --git a/gcc/testsuite/gcc.target/loongarch/pr121906.c b/gcc/testsuite/gcc.target/loongarch/pr121906.c
new file mode 100644
index 00000000000..b4fde5f0c85
--- /dev/null
+++ b/gcc/testsuite/gcc.target/loongarch/pr121906.c
@@ -0,0 +1,31 @@
+/* PR target/121906 */
+/* { dg-do run } */
+/* { dg-options "-O2 -mno-lsx" } */
+
+typedef unsigned short u16;
+typedef unsigned long u64;
+typedef u16 v4hi __attribute__ ((vector_size (8)));
+typedef u16 v8hi __attribute__ ((vector_size (16)));
+
+u64 d;
+int e, i;
+u16 x;
+
+int
+main ()
+{
+  v4hi n = { 1 };
+  u64 *o = &d;
+p:
+  asm goto ("" : : : : q);
+  n[3] = (-(v8hi){ 0, 0, 0, 0, x })[7];
+  for (; e >= 0; e--)
+    {
+      *o = n[0];
+      if (i)
+        goto p;
+    q:
+    }
+  if (d != 1)
+    __builtin_trap ();
+}
-- 
Gitee


From ca8eb688a5d676c744aa6e864cdf16df9c9e00a7 Mon Sep 17 00:00:00 2001
From: Xi Ruoyao <xry111@xry111.site>
Date: Sun, 26 Oct 2025 13:20:20 +0800
Subject: [PATCH 19/31] LoongArch: Make the code generation of the trap pattern
 configurable

In some applications (notably the Linux kernel), "break 0" is used as a
trap that a handler may be able to recover.  But in GCC the "trap"
pattern is meant to make the program rightfully die instead.

As [1] describes, sometimes it's vital to distinguish between the two
cases.  The kernel developers prefer "break 1" here, but in the
user-space it's better to trigger a SIGILL instead of SIGTRAP as the
latter is more likely used as a application-defined trap.

To support both cases, make the code generation configurable with a new
option.

[1]:https://lore.kernel.org/20250923061722.24457-1-yangtiezhu@loongson.cn

gcc/

	* config/loongarch/genopts/loongarch.opt.in (-mbreak-code=):
	New.
	* config/loongarch/loongarch.opt: Regenerate.
	* config/loongarch/loongarch.md (trap): Separate to a
	define_insn and a define_expand which takes la_break_code.
	* doc/invoke.texi (-mbreak-code=): Document.
	* config/loongarch/loongarch.opt.urls: Regenerate.

gcc/testsuite

	* gcc.target/loongarch/trap-default.c: New test.
	* gcc.target/loongarch/trap-1.c: New test.
---
 gcc/config/loongarch/genopts/loongarch.opt.in     |  4 ++++
 gcc/config/loongarch/loongarch.md                 | 14 +++++++++++---
 gcc/config/loongarch/loongarch.opt                |  4 ++++
 gcc/config/loongarch/loongarch.opt.urls           |  3 +++
 gcc/testsuite/gcc.target/loongarch/trap-1.c       |  9 +++++++++
 gcc/testsuite/gcc.target/loongarch/trap-default.c |  9 +++++++++
 6 files changed, 40 insertions(+), 3 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/loongarch/trap-1.c
 create mode 100644 gcc/testsuite/gcc.target/loongarch/trap-default.c

diff --git a/gcc/config/loongarch/genopts/loongarch.opt.in b/gcc/config/loongarch/genopts/loongarch.opt.in
index 4ffd969e350..9739279a549 100644
--- a/gcc/config/loongarch/genopts/loongarch.opt.in
+++ b/gcc/config/loongarch/genopts/loongarch.opt.in
@@ -205,6 +205,10 @@ mmax-inline-memcpy-size=
 Target Joined RejectNegative UInteger Var(la_max_inline_memcpy_size) Init(1024) Save
 -mmax-inline-memcpy-size=SIZE	Set the max size of memcpy to inline, default is 1024.
 
+mbreak-code=
+Target Joined UInteger Var(la_break_code) Init(-1) Save
+-mbreak-code=CODE	Use 'break CODE' for traps supposed to be unrecoverable, or an 'amswap.w' instruction leading to INE if CODE is out of range.
+
 Enum
 Name(explicit_relocs) Type(int)
 The code model option names for -mexplicit-relocs:
diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md
index 155390fb2d7..fd59ec9225e 100644
--- a/gcc/config/loongarch/loongarch.md
+++ b/gcc/config/loongarch/loongarch.md
@@ -689,14 +689,22 @@
 ;;  ....................
 ;;
 
-(define_insn "trap"
-  [(trap_if (const_int 1) (const_int 0))]
+(define_insn "*trap"
+  [(trap_if (const_int 1) (match_operand 0 "const_int_operand"))]
   ""
 {
-  return "break\t0";
+  return (const_uimm15_operand (operands[0], VOIDmode)
+	  ? "break\t%0"
+	  : "amswap.w\t$r0,$r1,$r0");
 }
   [(set_attr "type" "trap")])
 
+(define_expand "trap"
+  [(trap_if (const_int 1) (match_dup 0))]
+  ""
+{
+  operands[0] = GEN_INT (la_break_code);
+})
 
 
 ;;
diff --git a/gcc/config/loongarch/loongarch.opt b/gcc/config/loongarch/loongarch.opt
index 52469757fc6..55af42c9e77 100644
--- a/gcc/config/loongarch/loongarch.opt
+++ b/gcc/config/loongarch/loongarch.opt
@@ -213,6 +213,10 @@ mmax-inline-memcpy-size=
 Target Joined RejectNegative UInteger Var(la_max_inline_memcpy_size) Init(1024) Save
 -mmax-inline-memcpy-size=SIZE	Set the max size of memcpy to inline, default is 1024.
 
+mbreak-code=
+Target Joined UInteger Var(la_break_code) Init(-1) Save
+-mbreak-code=CODE	Use 'break CODE' for traps supposed to be unrecoverable, or an 'amswap.w' instruction leading to INE if CODE is out of range.
+
 Enum
 Name(explicit_relocs) Type(int)
 The code model option names for -mexplicit-relocs:
diff --git a/gcc/config/loongarch/loongarch.opt.urls b/gcc/config/loongarch/loongarch.opt.urls
index 5f644f6c315..7f02d1174ff 100644
--- a/gcc/config/loongarch/loongarch.opt.urls
+++ b/gcc/config/loongarch/loongarch.opt.urls
@@ -48,6 +48,9 @@ UrlSuffix(gcc/LoongArch-Options.html#index-mstrict-align-1)
 mmax-inline-memcpy-size=
 UrlSuffix(gcc/LoongArch-Options.html#index-mmax-inline-memcpy-size)
 
+mbreak-code=
+UrlSuffix(gcc/LoongArch-Options.html#index-mbreak-code)
+
 mexplicit-relocs=
 UrlSuffix(gcc/LoongArch-Options.html#index-mexplicit-relocs-1)
 
diff --git a/gcc/testsuite/gcc.target/loongarch/trap-1.c b/gcc/testsuite/gcc.target/loongarch/trap-1.c
new file mode 100644
index 00000000000..8936f60cce2
--- /dev/null
+++ b/gcc/testsuite/gcc.target/loongarch/trap-1.c
@@ -0,0 +1,9 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -w -fisolate-erroneous-paths-dereference -mbreak-code=1" } */
+/* { dg-final { scan-assembler "break\\t1" } } */
+
+int
+bug (void)
+{
+  return *(int *)0;
+}
diff --git a/gcc/testsuite/gcc.target/loongarch/trap-default.c b/gcc/testsuite/gcc.target/loongarch/trap-default.c
new file mode 100644
index 00000000000..32948d4c822
--- /dev/null
+++ b/gcc/testsuite/gcc.target/loongarch/trap-default.c
@@ -0,0 +1,9 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -w -fisolate-erroneous-paths-dereference" } */
+/* { dg-final { scan-assembler "amswap\\.w\\t\\\$r0,\\\$r1,\\\$r0" } } */
+
+int
+bug (void)
+{
+  return *(int *)0;
+}
-- 
Gitee


From 7d443f4ce1e459b1ef72079dbda045353c527b0b Mon Sep 17 00:00:00 2001
From: Guo Jie <guojie@loongson.cn>
Date: Sat, 1 Nov 2025 15:33:06 +0800
Subject: [PATCH 20/31] LoongArch: Correct the cost of mulh.{w[u]/d[u]}

gcc/ChangeLog:

	* config/loongarch/loongarch.cc (loongarch_rtx_costs):
	Correct the cost of mulh.{w[u]|d[u]}.

gcc/testsuite/ChangeLog:

	* gcc.target/loongarch/mulh_wu.c: New test.
---
 gcc/config/loongarch/loongarch.cc            | 27 +++++++++++++++++++-
 gcc/testsuite/gcc.target/loongarch/mulh_wu.c | 10 ++++++++
 2 files changed, 36 insertions(+), 1 deletion(-)
 create mode 100644 gcc/testsuite/gcc.target/loongarch/mulh_wu.c

diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc
index 4690c2a2d96..aae2e4138ba 100644
--- a/gcc/config/loongarch/loongarch.cc
+++ b/gcc/config/loongarch/loongarch.cc
@@ -3901,9 +3901,34 @@ loongarch_rtx_costs (rtx x, machine_mode mode, int outer_code,
 				      speed);
       return true;
 
+    case LSHIFTRT:
+      /* Correct the cost of mulh.{w[u]/d[u]}.  */
+      if (outer_code == TRUNCATE && CONST_INT_P (XEXP (x, 1))
+	  && INTVAL (XEXP (x, 1)) == (GET_MODE_BITSIZE (mode) / 2)
+	  && GET_CODE (XEXP (x, 0)) == MULT
+	  && ((GET_CODE (XEXP (XEXP (x, 0), 0)) == ZERO_EXTEND
+	       && GET_CODE (XEXP (XEXP (x, 0), 1)) == ZERO_EXTEND)
+	      || (GET_CODE (XEXP (XEXP (x, 0), 0)) == SIGN_EXTEND
+		  && GET_CODE (XEXP (XEXP (x, 0), 1)) == SIGN_EXTEND))
+	  && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == REG
+	  && GET_CODE (XEXP (XEXP (XEXP (x, 0), 1), 0)) == REG)
+	{
+	  if (GET_MODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SImode
+	      && GET_MODE (XEXP (XEXP (XEXP (x, 0), 1), 0)) == SImode)
+	    {
+	      *total = loongarch_cost->int_mult_si;
+	      return true;
+	    }
+	  if (GET_MODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == DImode
+	      && GET_MODE (XEXP (XEXP (XEXP (x, 0), 1), 0)) == DImode)
+	    {
+	      *total = loongarch_cost->int_mult_di;
+	      return true;
+	    }
+	}
+      /* Fall through.  */
     case ASHIFT:
     case ASHIFTRT:
-    case LSHIFTRT:
     case ROTATE:
     case ROTATERT:
       if (CONSTANT_P (XEXP (x, 1)))
diff --git a/gcc/testsuite/gcc.target/loongarch/mulh_wu.c b/gcc/testsuite/gcc.target/loongarch/mulh_wu.c
new file mode 100644
index 00000000000..53fc518313c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/loongarch/mulh_wu.c
@@ -0,0 +1,10 @@
+/* { dg-do compile { target { loongarch64*-*-* } } } */
+/* { dg-options "-O3 -mabi=lp64d" } */
+/* { dg-final { scan-assembler "\tmulh.wu" } } */
+/* { dg-final { scan-assembler-not "\tlu32i.d" } } */
+
+unsigned int
+test (unsigned int *a)
+{
+  return *a / 60;
+}
-- 
Gitee


From ab8ec5fd02d5dca8b236284b928d95cda257bde8 Mon Sep 17 00:00:00 2001
From: Xi Ruoyao <xry111@xry111.site>
Date: Tue, 4 Nov 2025 21:03:18 +0800
Subject: [PATCH 21/31] LoongArch: Switch the default code model to medium
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

It has turned out the normal code model isn't enough for some large
LoongArch link units in practice.  Quoting WANG Rui's comment [1]:

We’ve actually been considering pushing for a change to the default
code model for LoongArch compilers (including GCC) for a while now.
In fact, this was one of the topics discussed in yesterday’s internal
compiler tool-chain meeting. The reason we haven’t moved forward with
it yet is that the medium code model generates a R_LARCH_CALL36
relocation, which had some issues with earlier versions of the linker.
We need to assess the impact on users before proceeding with the change.

In GCC we have build-time probe for linker call36 support and if the
linker does not support it, we fall back to pcalau12i + jirl or
la.{local,global} + jirl for the medium code model.  I also had some
concern about a potential performance regression caused by the
conservative nature of the relaxation process, but when I tested this
patch it turned out the relaxation is powerful enough to eliminate all
the pcaddu18i instructions in cc1plus and libstdc++.so.

The Loong Arch Linux project has been using -mcmodel=medium in their
{C,CXX}FLAGS building packages for a while [2] and they've not reported
any issues with that.

The Linux kernel developers has already anticipated the change and
explicitly specified -mcmodel=normal for a while [3].

Thus to me it's safe to make GCC 16 the first release with the medium
code model as the default now.  If someone must keep the normal code
model as the default for any reason, it's possible to configure GCC
using --with-cmodel=normal.

[1]: https://discourse.llvm.org/t/rfc-changing-the-default-code-model-for-loongarch/85317/3
[2]: https://github.com/lcpu-club/loongarch-packages/pull/340
[3]: https://git.kernel.org/torvalds/c/e67e0eb6a98b

gcc/

	* config.gcc: Support --with-cmodel={medium,normal} and make
	medium the default for LoongArch, define TARGET_DEFAULT_CMODEL
	as the selected value.
	* config/loongarch/loongarch-opts.cc: Use TARGET_DEFAULT_CMODEL
	instead of hard coding CMODEL_NORMAL.
	* doc/install.texi: Document that --with-cmodel= is supported
	for LoongArch.
	* doc/invoke.texi: Update the document about default code model
	on LoongArch.

gcc/testsuite/

	* gcc.target/loongarch/vect-frint-no-inexact.c (dg-options): Add
	-mcmodel=normal.
	* gcc.target/loongarch/vect-frint-scalar-no-inexact.c: Likewise.
	* gcc.target/loongarch/vect-frint-scalar.c: Likewise.
	* gcc.target/loongarch/vect-frint.c: Likewise.
	* gcc.target/loongarch/vect-ftint-no-inexact.c: Likewise.
	* gcc.target/loongarch/vect-ftint.c: Likewise.
---
 gcc/config.gcc                                 | 18 +++++++++++++++++-
 gcc/config/loongarch/loongarch-opts.cc         |  2 +-
 .../loongarch/vect-frint-no-inexact.c          |  2 +-
 .../loongarch/vect-frint-scalar-no-inexact.c   |  2 +-
 .../gcc.target/loongarch/vect-frint-scalar.c   |  2 +-
 .../gcc.target/loongarch/vect-frint.c          |  2 +-
 .../loongarch/vect-ftint-no-inexact.c          |  2 +-
 .../gcc.target/loongarch/vect-ftint.c          |  2 +-
 8 files changed, 24 insertions(+), 8 deletions(-)

diff --git a/gcc/config.gcc b/gcc/config.gcc
index 0d5eafa93ea..31f84b37d16 100644
--- a/gcc/config.gcc
+++ b/gcc/config.gcc
@@ -4982,7 +4982,7 @@ case "${target}" in
 		;;
 
 	loongarch*-*)
-		supported_defaults="abi arch tune fpu simd multilib-default strict-align-lib tls"
+		supported_defaults="abi arch tune fpu simd multilib-default strict-align-lib tls cmodel"
 
 		# Local variables
 		unset \
@@ -5403,6 +5403,22 @@ case "${target}" in
 		# Remove the excessive appending comma.
 		loongarch_multilib_list_c=${loongarch_multilib_list_c%,}
 		loongarch_multilib_list_make=${loongarch_multilib_list_make%,}
+
+		# Handle --with-cmodel.
+		# Make sure --with-cmodel is valid.  If it was not specified,
+		# use medium as the default value.
+		case "${with_cmodel}" in
+		"" | medium)
+			tm_defines="${tm_defines} TARGET_DEFAULT_CMODEL=CMODEL_MEDIUM"
+			;;
+		normal)
+			tm_defines="${tm_defines} TARGET_DEFAULT_CMODEL=CMODEL_NORMAL"
+			;;
+		*)
+			echo "invalid option for --with-cmodel: '${with_cmodel}', available values are 'medium' and 'normal'" 1>&2
+			exit 1
+			;;
+		esac
 		;;
 
 	nds32*-*-*)
diff --git a/gcc/config/loongarch/loongarch-opts.cc b/gcc/config/loongarch/loongarch-opts.cc
index ef877ae167a..80a5eb87a43 100644
--- a/gcc/config/loongarch/loongarch-opts.cc
+++ b/gcc/config/loongarch/loongarch-opts.cc
@@ -540,7 +540,7 @@ fallback:
 
 
   /* 5.  Target code model */
-  t.cmodel = constrained.cmodel ? target->cmodel : CMODEL_NORMAL;
+  t.cmodel = constrained.cmodel ? target->cmodel : TARGET_DEFAULT_CMODEL;
 
   switch (t.cmodel)
     {
diff --git a/gcc/testsuite/gcc.target/loongarch/vect-frint-no-inexact.c b/gcc/testsuite/gcc.target/loongarch/vect-frint-no-inexact.c
index 7bbaf1fba5a..e20eaea205a 100644
--- a/gcc/testsuite/gcc.target/loongarch/vect-frint-no-inexact.c
+++ b/gcc/testsuite/gcc.target/loongarch/vect-frint-no-inexact.c
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-options "-O2 -mabi=lp64d -mdouble-float -fno-math-errno -fno-fp-int-builtin-inexact -mlasx" } */
+/* { dg-options "-O2 -mabi=lp64d -mdouble-float -fno-math-errno -fno-fp-int-builtin-inexact -mlasx -mcmodel=normal" } */
 
 #include "vect-frint.c"
 
diff --git a/gcc/testsuite/gcc.target/loongarch/vect-frint-scalar-no-inexact.c b/gcc/testsuite/gcc.target/loongarch/vect-frint-scalar-no-inexact.c
index 002e3b92df7..d5f0933537d 100644
--- a/gcc/testsuite/gcc.target/loongarch/vect-frint-scalar-no-inexact.c
+++ b/gcc/testsuite/gcc.target/loongarch/vect-frint-scalar-no-inexact.c
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-options "-O2 -mlsx -fno-fp-int-builtin-inexact" } */
+/* { dg-options "-O2 -mlsx -fno-fp-int-builtin-inexact -mcmodel=normal" } */
 
 #include "vect-frint-scalar.c"
 
diff --git a/gcc/testsuite/gcc.target/loongarch/vect-frint-scalar.c b/gcc/testsuite/gcc.target/loongarch/vect-frint-scalar.c
index dbcb9065ad4..171ba98f00b 100644
--- a/gcc/testsuite/gcc.target/loongarch/vect-frint-scalar.c
+++ b/gcc/testsuite/gcc.target/loongarch/vect-frint-scalar.c
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-options "-O2 -mlsx -ffp-int-builtin-inexact" } */
+/* { dg-options "-O2 -mlsx -ffp-int-builtin-inexact -mcmodel=normal" } */
 
 #define test(func, suffix) \
 __typeof__ (1.##suffix) \
diff --git a/gcc/testsuite/gcc.target/loongarch/vect-frint.c b/gcc/testsuite/gcc.target/loongarch/vect-frint.c
index 6bf211e7e98..bda041bdf91 100644
--- a/gcc/testsuite/gcc.target/loongarch/vect-frint.c
+++ b/gcc/testsuite/gcc.target/loongarch/vect-frint.c
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-options "-O2 -mabi=lp64d -mdouble-float -fno-math-errno -ffp-int-builtin-inexact -mlasx" } */
+/* { dg-options "-O2 -mabi=lp64d -mdouble-float -fno-math-errno -ffp-int-builtin-inexact -mlasx -mcmodel=normal" } */
 
 float out_x[8];
 double out_y[4];
diff --git a/gcc/testsuite/gcc.target/loongarch/vect-ftint-no-inexact.c b/gcc/testsuite/gcc.target/loongarch/vect-ftint-no-inexact.c
index 61918beef5c..3fa97531d59 100644
--- a/gcc/testsuite/gcc.target/loongarch/vect-ftint-no-inexact.c
+++ b/gcc/testsuite/gcc.target/loongarch/vect-ftint-no-inexact.c
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-options "-O2 -mabi=lp64d -mdouble-float -fno-math-errno -fno-fp-int-builtin-inexact -mlasx" } */
+/* { dg-options "-O2 -mabi=lp64d -mdouble-float -fno-math-errno -fno-fp-int-builtin-inexact -mlasx -mcmodel=normal" } */
 
 #include "vect-ftint.c"
 
diff --git a/gcc/testsuite/gcc.target/loongarch/vect-ftint.c b/gcc/testsuite/gcc.target/loongarch/vect-ftint.c
index c4962ed1774..96da3cd7b57 100644
--- a/gcc/testsuite/gcc.target/loongarch/vect-ftint.c
+++ b/gcc/testsuite/gcc.target/loongarch/vect-ftint.c
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-options "-O2 -mabi=lp64d -mdouble-float -fno-math-errno -ffp-int-builtin-inexact -mlasx" } */
+/* { dg-options "-O2 -mabi=lp64d -mdouble-float -fno-math-errno -ffp-int-builtin-inexact -mlasx -mcmodel=normal" } */
 
 int out_x[8];
 long out_y[4];
-- 
Gitee


From 226b976fe17306ff3b2c27a4c2a8e70e0d9ca7e1 Mon Sep 17 00:00:00 2001
From: Lulu Cheng <chenglulu@loongson.cn>
Date: Mon, 3 Nov 2025 17:53:52 +0800
Subject: [PATCH 22/31] LoongArch: Fix PR122097 (2).

r16-4703 does not completely fix PR122097.  Floating-point vectors
were not processed in the function loongarch_const_vector_same_bytes_p.
This patch will completely resolve this issue.

	PR target/122097

gcc/ChangeLog:

	* config/loongarch/loongarch.cc
	(loongarch_const_vector_same_bytes_p): Add processing for
	floating-point vector data.
---
 gcc/config/loongarch/loongarch.cc | 22 +++++++++++++++++++++-
 1 file changed, 21 insertions(+), 1 deletion(-)

diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc
index aae2e4138ba..c02b5a18cc8 100644
--- a/gcc/config/loongarch/loongarch.cc
+++ b/gcc/config/loongarch/loongarch.cc
@@ -1783,7 +1783,27 @@ loongarch_const_vector_same_bytes_p (rtx op, machine_mode mode)
 
   first = CONST_VECTOR_ELT (op, 0);
   bytes = GET_MODE_UNIT_SIZE (mode);
-  val = INTVAL (first);
+
+  if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
+    {
+      rtx val_s = CONST_VECTOR_ELT (op, 0);
+      const REAL_VALUE_TYPE *x = CONST_DOUBLE_REAL_VALUE (val_s);
+      if (GET_MODE (val_s) == DFmode)
+	{
+	  long tmp[2];
+	  REAL_VALUE_TO_TARGET_DOUBLE (*x, tmp);
+	  val = (unsigned HOST_WIDE_INT) tmp[1] << 32 | tmp[0];
+	}
+      else
+	{
+	  long tmp;
+	  REAL_VALUE_TO_TARGET_SINGLE (*x, tmp);
+	  val = (unsigned HOST_WIDE_INT) tmp;
+	}
+    }
+  else
+    val = UINTVAL (first);
+
   first_byte = val & 0xff;
   for (i = 1; i < bytes; i++)
     {
-- 
Gitee


From e29f2ae6fddb6a8e5a95df8acace8dbdf903a97a Mon Sep 17 00:00:00 2001
From: Lulu Cheng <chenglulu@loongson.cn>
Date: Tue, 14 Oct 2025 16:20:04 +0800
Subject: [PATCH 23/31] LoongArch: Fix ICE for illegal strings in the target
 attribute.

Modify the two situations:

1. __attribute__ ((target ("arch")))
ICE will be reported before modification, and there will be an error
prompt after modification.

2. __attribute__ ((target ("arch=12345")))
Fixed the issue where the attribute string was not printed completely
in the previous error message.

gcc/ChangeLog:

	* config/loongarch/loongarch-target-attr.cc
	(loongarch_process_one_target_attr): Fix ICE.

gcc/testsuite/ChangeLog:

	* gcc.target/loongarch/attr-check-error-message.c: Add tests.
---
 gcc/config/loongarch/loongarch-target-attr.cc          | 10 ++++++++--
 .../gcc.target/loongarch/attr-check-error-message.c    |  8 ++++++++
 2 files changed, 16 insertions(+), 2 deletions(-)

diff --git a/gcc/config/loongarch/loongarch-target-attr.cc b/gcc/config/loongarch/loongarch-target-attr.cc
index cb537446dff..922aa0483b5 100644
--- a/gcc/config/loongarch/loongarch-target-attr.cc
+++ b/gcc/config/loongarch/loongarch-target-attr.cc
@@ -203,7 +203,13 @@ loongarch_process_one_target_attr (char *arg_str, location_t loc)
 	  /* Use the option setting machinery to set an option to an enum.  */
 	  case loongarch_attr_enum:
 	    {
-	      gcc_assert (arg);
+	      if (!arg)
+		{
+		  error_at (loc, "the value of pragma or attribute "
+			    "%<target(\"%s\")%> not be empty", str_to_check);
+		  return false;
+		}
+
 	      bool valid;
 	      int value;
 	      struct cl_decoded_option decoded;
@@ -244,7 +250,7 @@ loongarch_process_one_target_attr (char *arg_str, location_t loc)
      were malformed we will have returned false already.  */
   if (!found)
     error_at (loc, "attribute %<target%> argument %qs is unknown",
-	      str_to_check);
+	      arg_str);
 
   return found;
 }
diff --git a/gcc/testsuite/gcc.target/loongarch/attr-check-error-message.c b/gcc/testsuite/gcc.target/loongarch/attr-check-error-message.c
index 82dcd172555..6420f332110 100644
--- a/gcc/testsuite/gcc.target/loongarch/attr-check-error-message.c
+++ b/gcc/testsuite/gcc.target/loongarch/attr-check-error-message.c
@@ -28,3 +28,11 @@ test6 (void)	/* { dg-error "attribute \\\'target\\\' argument not a string" } */
 __attribute__ ((target ("lsx,"))) void
 test7 (void)	/* { dg-error "malformed \\\'target\\\(\\\"lsx,\\\"\\\)\\\' pragma or attribute" } */
 {}
+
+__attribute__ ((target ("arch"))) void
+test8 (void)	/* { dg-error "the value of pragma or attribute \\\'target\\\(\\\"arch\\\"\\\)\\\' not be empty" } */
+{}
+
+__attribute__ ((target ("lsx;priority=1"))) void
+test9 (void)	/* { dg-error "attribute \\\'target\\\' argument \\\'lsx;priority=1\\\' is unknown" } */
+{}
-- 
Gitee


From 01a2da7c927d541d73c3f2014a7cf10ab4ca7c88 Mon Sep 17 00:00:00 2001
From: zhaozhou <zhaozhou@loongson.cn>
Date: Mon, 10 Nov 2025 15:38:26 +0800
Subject: [PATCH 24/31] LoongArch: Fix issue where data marked as GTY is
 cleaned up by ggc.

As for GGC(GCC Garbage Collection), it's use gengtype tool to scan all
source files contain the GTY mark, and generate gt-*.h files. GGC
traversal these file to find gt_root node and marks these objects that
directly or indirectly reference this node as active, then clean up
unmarked object's memory.

For the loongarch-builtins.cc file, it is necessary to add
target_gtfiles in config.gcc to generate gt-loongarch-builtins.h, and
include this header file in the .cc file, prevented the data marked
with GTY in this `.cc` file cleaned up by ggc.

gcc/ChangeLog:

	* config.gcc: Add target_gtfiles.
	* config/loongarch/loongarch-builtins.cc: Add header file.
---
 gcc/config.gcc                             | 1 +
 gcc/config/loongarch/loongarch-builtins.cc | 2 ++
 2 files changed, 3 insertions(+)

diff --git a/gcc/config.gcc b/gcc/config.gcc
index 31f84b37d16..ec22d4b3e78 100644
--- a/gcc/config.gcc
+++ b/gcc/config.gcc
@@ -460,6 +460,7 @@ loongarch*-*-*)
 	extra_headers="larchintrin.h lsxintrin.h lasxintrin.h"
 	extra_objs="loongarch-c.o loongarch-builtins.o loongarch-cpu.o loongarch-opts.o loongarch-def.o loongarch-evolution.o loongarch-target-attr.o"
 	extra_gcc_objs="loongarch-driver.o loongarch-cpu.o loongarch-opts.o loongarch-def.o"
+	target_gtfiles="\$(srcdir)/config/loongarch/loongarch-builtins.cc"
 	extra_options="${extra_options} g.opt fused-madd.opt"
 	;;
 nds32*)
diff --git a/gcc/config/loongarch/loongarch-builtins.cc b/gcc/config/loongarch/loongarch-builtins.cc
index 8492a5bda95..5d06a4d88c2 100644
--- a/gcc/config/loongarch/loongarch-builtins.cc
+++ b/gcc/config/loongarch/loongarch-builtins.cc
@@ -3106,3 +3106,5 @@ loongarch_build_builtin_va_list (void)
 {
   return ptr_type_node;
 }
+
+#include "gt-loongarch-builtins.h"
-- 
Gitee


From d03acd1accac2f6316b60288ecde2ccc0ca236d6 Mon Sep 17 00:00:00 2001
From: zhaozhou <zhaozhou@loongson.cn>
Date: Mon, 10 Nov 2025 15:20:26 +0800
Subject: [PATCH 25/31] LoongArch: Fix predicate for
 symbolic_pcrel_offset_operand.

The predicate checks if the operand is PLUS(symbol_ref, const_int), but
the match (match_operand 0/1) is not equal XEXP(op, 0/1). It should be
adjusted to use match_test and pass XEXP(op, 0/1) into the constraint
function.

gcc/ChangeLog:

	* config/loongarch/predicates.md: Update ops.
---
 gcc/config/loongarch/predicates.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/gcc/config/loongarch/predicates.md b/gcc/config/loongarch/predicates.md
index 8d9f92e2263..99bed32091b 100644
--- a/gcc/config/loongarch/predicates.md
+++ b/gcc/config/loongarch/predicates.md
@@ -570,8 +570,8 @@
 
 (define_predicate "symbolic_pcrel_offset_operand"
   (and (match_code "plus")
-       (match_operand 0 "symbolic_pcrel_operand")
-       (match_operand 1 "const_int_operand")))
+       (match_test "symbolic_pcrel_operand (XEXP (op, 0), mode)")
+       (match_test "const_int_operand (XEXP (op, 1), mode)")))
 
 (define_predicate "mem_simple_ldst_operand"
   (match_code "mem")
-- 
Gitee


From d498634d1f5240d6362d8b0925b6ae411c9aad50 Mon Sep 17 00:00:00 2001
From: Xi Ruoyao <xry111@xry111.site>
Date: Sun, 16 Nov 2025 00:30:43 +0800
Subject: [PATCH 26/31] LoongArch: Fix wrong code from
 loongarch_expand_vec_perm_1 [PR 122695]

	PR target/122695

gcc/

	* config/loongarch/loongarch.cc (loongarch_expand_vec_perm_1):
	Simplify and fix the logic preventing the xvshuf.* unpredictable
	behavior.

gcc/testsuite/

	* gcc.target/loongarch/pr122695-1.c: New test.
	* gcc.target/loongarch/pr122695-2.c: New test.
---
 gcc/config/loongarch/loongarch.cc             | 56 ++-----------------
 .../gcc.target/loongarch/pr122695-1.c         | 22 ++++++++
 .../gcc.target/loongarch/pr122695-2.c         | 22 ++++++++
 3 files changed, 50 insertions(+), 50 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/loongarch/pr122695-1.c
 create mode 100644 gcc/testsuite/gcc.target/loongarch/pr122695-2.c

diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc
index c02b5a18cc8..a06e29ffc7b 100644
--- a/gcc/config/loongarch/loongarch.cc
+++ b/gcc/config/loongarch/loongarch.cc
@@ -8934,57 +8934,13 @@ loongarch_expand_vec_perm_1 (rtx operands[])
   /* Number of elements in the vector.  */
   w = GET_MODE_NUNITS (mode);
 
-  rtx round_data[MAX_VECT_LEN];
-  rtx round_reg, round_data_rtx;
-
-  if (mode != E_V32QImode)
+  /* If we are using xvshuf.*, clamp the selector to avoid unpredictable
+     output.  */
+  if (maskmode != V8SImode && maskmode != V4DImode)
     {
-      for (int i = 0; i < w; i += 1)
-	{
-	  round_data[i] = GEN_INT (0x1f);
-	}
-
-      if (mode == E_V4DFmode)
-	{
-	  round_data_rtx = gen_rtx_CONST_VECTOR (E_V4DImode,
-						 gen_rtvec_v (w, round_data));
-	  round_reg = gen_reg_rtx (E_V4DImode);
-	}
-      else if (mode == E_V8SFmode)
-	{
-
-	  round_data_rtx = gen_rtx_CONST_VECTOR (E_V8SImode,
-						 gen_rtvec_v (w, round_data));
-	  round_reg = gen_reg_rtx (E_V8SImode);
-	}
-      else
-	{
-	  round_data_rtx = gen_rtx_CONST_VECTOR (mode,
-						 gen_rtvec_v (w, round_data));
-	  round_reg = gen_reg_rtx (mode);
-	}
-
-      emit_move_insn (round_reg, round_data_rtx);
-      switch (mode)
-	{
-	case E_V32QImode:
-	  emit_insn (gen_andv32qi3 (mask, mask, round_reg));
-	  break;
-	case E_V16HImode:
-	  emit_insn (gen_andv16hi3 (mask, mask, round_reg));
-	  break;
-	case E_V8SImode:
-	case E_V8SFmode:
-	  emit_insn (gen_andv8si3 (mask, mask, round_reg));
-	  break;
-	case E_V4DImode:
-	case E_V4DFmode:
-	  emit_insn (gen_andv4di3 (mask, mask, round_reg));
-	  break;
-	default:
-	  gcc_unreachable ();
-	  break;
-	}
+      rtx t = gen_const_vec_duplicate (maskmode, GEN_INT (0x1f));
+      mask = expand_binop (maskmode, and_optab, mask, t, NULL_RTX, false,
+			   OPTAB_DIRECT);
     }
 
   if (mode == V4DImode || mode == V4DFmode)
diff --git a/gcc/testsuite/gcc.target/loongarch/pr122695-1.c b/gcc/testsuite/gcc.target/loongarch/pr122695-1.c
new file mode 100644
index 00000000000..d7c906cc0ae
--- /dev/null
+++ b/gcc/testsuite/gcc.target/loongarch/pr122695-1.c
@@ -0,0 +1,22 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mlasx" } */
+/* { dg-require-effective-target loongarch_asx_hw } */
+
+[[gnu::vector_size (32)]] char a, b, c;
+
+[[gnu::noipa]] void
+test (void)
+{
+  a = __builtin_shuffle (a, b, c);
+}
+
+int
+main (void)
+{
+  a = (typeof (a)){} + 5;
+  b = (typeof (a)){} + 6;
+  c = (typeof (a)){} + 64;
+  test ();
+  if (a[0] != 5)
+    __builtin_trap ();
+}
diff --git a/gcc/testsuite/gcc.target/loongarch/pr122695-2.c b/gcc/testsuite/gcc.target/loongarch/pr122695-2.c
new file mode 100644
index 00000000000..79fc139b55f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/loongarch/pr122695-2.c
@@ -0,0 +1,22 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mlasx" } */
+/* { dg-require-effective-target loongarch_asx_hw } */
+
+[[gnu::vector_size(32)]] short a, b, c;
+
+[[gnu::noipa]] void
+test (void)
+{
+  a = __builtin_shuffle(a, b, c) + c;
+}
+
+int
+main (void)
+{
+  a = (typeof (a)){} + 1;
+  b = (typeof (b)){} + 2;
+  c = (typeof (c)){} + 128;
+  test ();
+  if (a[0] != 129)
+    __builtin_trap ();
+}
-- 
Gitee


From a27f2b823b25e12473396822e3e01f577c453687 Mon Sep 17 00:00:00 2001
From: zhaozhou <zhaozhou@loongson.cn>
Date: Fri, 14 Nov 2025 11:09:13 +0800
Subject: [PATCH 27/31] LoongArch: Fix operands[2] predicate of
 lsx_vreplvei_mirror.

UNSPEC_LSX_VREPLVEI_MIRROR describes the mirroring operation that copies
the lower 64 bits of a 128-bit register to the upper 64 bits. So in any
mode, the value range of op2 can only be 0 or 1 for the vreplvei.d insn.

gcc/ChangeLog:

	* config/loongarch/lsx.md: Fix predicate.
---
 gcc/config/loongarch/lsx.md | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/gcc/config/loongarch/lsx.md b/gcc/config/loongarch/lsx.md
index b1bccccb5e5..0246f2c3aab 100644
--- a/gcc/config/loongarch/lsx.md
+++ b/gcc/config/loongarch/lsx.md
@@ -1920,11 +1920,15 @@
   [(set_attr "type" "simd_splat")
    (set_attr "mode" "<MODE>")])
 
+;; UNSPEC_LSX_VREPLVEI_MIRROR describes the mirror operation that copies
+;; the lower 64 bits of a 128-bit register to the upper 64 bits. It is only
+;; called when the high half-part is the same as the low.
+
 (define_insn "lsx_vreplvei_mirror_<lsxfmt_f>"
   [(set (match_operand:LSX 0 "register_operand" "=f")
 	(unspec: LSX [(match_operand:LSX 1 "register_operand" "f")
-				(match_operand 2 "const_<indeximm>_operand" "")]
-				UNSPEC_LSX_VREPLVEI_MIRROR))]
+		      (match_operand 2 "const_0_or_1_operand" "")]
+		      UNSPEC_LSX_VREPLVEI_MIRROR))]
   "ISA_HAS_LSX"
   "vreplvei.d\t%w0,%w1,%2"
   [(set_attr "type" "simd_splat")
-- 
Gitee


From 46ba635a9ffeba378d67d739795bf95c8c710bdd Mon Sep 17 00:00:00 2001
From: DengJianbo <dengjianbo@loongson.cn>
Date: Tue, 13 Jan 2026 15:21:54 +0800
Subject: [PATCH 28/31] LoongArch: Fix ICE when explicit-relocs is none

When set -mexplicit-relocs=none, the symbol address should be caclulated
by macro instructions, for example la.local. Due to the condition
TARGET_CMODEL_EXTREME in movdi_symbolic_off64, this template can not be
matched in case the cmodel is normal. If the variable has attribute
model("extreme"), gcc will get crashed with error unrecognizable insns.
This patch fix this issue by removing TARGET_CMODEL_EXTREME, since it
already checked in prediction symbolic_off64_or_reg_operand.

gcc/ChangeLog:

	* config/loongarch/loongarch.md: Remove condition in template
	movdi_symbolic_off64.

gcc/testsuite/ChangeLog:

	* gcc.target/loongarch/la64/attr-model-6.c: New test.
---
 gcc/config/loongarch/loongarch.md                      | 2 +-
 gcc/testsuite/gcc.target/loongarch/la64/attr-model-6.c | 8 ++++++++
 2 files changed, 9 insertions(+), 1 deletion(-)
 create mode 100644 gcc/testsuite/gcc.target/loongarch/la64/attr-model-6.c

diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md
index fd59ec9225e..86947547bbe 100644
--- a/gcc/config/loongarch/loongarch.md
+++ b/gcc/config/loongarch/loongarch.md
@@ -2256,7 +2256,7 @@
   (unspec:DI [(const_int 0)]
     UNSPEC_LOAD_SYMBOL_OFFSET64)
   (clobber (match_operand:DI 2 "register_operand" "=&r,r"))]
- "TARGET_64BIT && TARGET_CMODEL_EXTREME"
+ "TARGET_64BIT"
 {
   if (which_alternative == 1)
     return "#";
diff --git a/gcc/testsuite/gcc.target/loongarch/la64/attr-model-6.c b/gcc/testsuite/gcc.target/loongarch/la64/attr-model-6.c
new file mode 100644
index 00000000000..9fa32a67bf4
--- /dev/null
+++ b/gcc/testsuite/gcc.target/loongarch/la64/attr-model-6.c
@@ -0,0 +1,8 @@
+/* { dg-do compile } */
+/* { dg-options "-mexplicit-relocs=none -mcmodel=normal -O2 -fno-pic" } */
+/* { dg-final { scan-assembler "la.local\t\\\$r\[0-9\]+,\\\$r\[0-9\]+,x" } } */
+/* { dg-final { scan-assembler "la.local\t\\\$r\[0-9\]+,y" } } */
+/* { dg-final { scan-assembler "la.local\t\\\$r\[0-9\]+,\\\$r\[0-9\]+,counter" } } */
+
+#define ATTR_MODEL_TEST
+#include "attr-model-test.c"
-- 
Gitee


From 957b49327725d31979ab42fb1685e040dafa849d Mon Sep 17 00:00:00 2001
From: Lulu Cheng <chenglulu@loongson.cn>
Date: Mon, 12 Jan 2026 17:23:41 +0800
Subject: [PATCH 29/31] LoongArch: Fix bug123521.

In the vector initialization process, optimization can be performed
if it can be determined that all elements are the same, or if the
upper or lower halves are identical.  However, during this
optimization, when the identical element is an immediate value
larger than 10 bits, an internal compiler error (ICE) occurs.
The reason is that in such cases, the function
`simplify_gen_subreg (imode, reg_tmp, GET_MODE (reg_tmp), 0)` is
called, where `imode` is `E_DImode`.  The mode of `reg_tmp` in
`GET_MODE (reg_tmp)` is taken from the immediate value's mode,
which is `E_VOIDmode`.  This results in a move from `E_VOIDmode`
to `E_DImode`, an operation not supported by LoongArch.

	PR target/123521

gcc/ChangeLog:

	* config/loongarch/loongarch.cc
	(loongarch_expand_vector_init_same): Fixed a bug in the
	vector initialization section..

gcc/testsuite/ChangeLog:

	* gcc.target/loongarch/vector/lasx/pr123521.c: New test.

Suggested-by: Xi Ruoyao <xry111@xry111.site>
---
 gcc/config/loongarch/loongarch.cc             | 27 +------------------
 .../loongarch/vector/lasx/pr123521.c          | 11 ++++++++
 2 files changed, 12 insertions(+), 26 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/loongarch/vector/lasx/pr123521.c

diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc
index a06e29ffc7b..16b9f5b870b 100644
--- a/gcc/config/loongarch/loongarch.cc
+++ b/gcc/config/loongarch/loongarch.cc
@@ -10150,32 +10150,7 @@ loongarch_expand_vector_init_same (rtx target, rtx vals, unsigned nvar)
 	}
     }
 
-  if (imode == GET_MODE (same))
-    temp = same;
-  else if (GET_MODE_SIZE (imode) >= UNITS_PER_WORD)
-    {
-      if (GET_CODE (same) == MEM)
-	{
-	  rtx reg_tmp = gen_reg_rtx (GET_MODE (same));
-	  loongarch_emit_move (reg_tmp, same);
-	  temp = simplify_gen_subreg (imode, reg_tmp, GET_MODE (reg_tmp), 0);
-	}
-      else
-	temp = simplify_gen_subreg (imode, same, GET_MODE (same), 0);
-    }
-  else
-    {
-      if (GET_CODE (same) == MEM)
-	{
-	  rtx reg_tmp = gen_reg_rtx (GET_MODE (same));
-	  loongarch_emit_move (reg_tmp, same);
-	  temp = lowpart_subreg (imode, reg_tmp, GET_MODE (reg_tmp));
-	}
-      else
-	temp = lowpart_subreg (imode, same, GET_MODE (same));
-    }
-
-  temp = force_reg (imode, temp);
+  temp = force_reg (imode, same);
 
   switch (vmode)
     {
diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/pr123521.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/pr123521.c
new file mode 100644
index 00000000000..9ccf391d38d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/pr123521.c
@@ -0,0 +1,11 @@
+/* { dg-do compile } */
+/* { dg-options "-O1 -msimd=lasx" } */
+
+typedef long long v4i64 __attribute__ ((vector_size (32), aligned (32)));
+extern long long *x_si;
+v4i64
+test (void)
+{
+  v4i64 a = { x_si[1], x_si[0], 0x411, 0x411 };
+  return a;
+}
-- 
Gitee


From 869d6e0ea703acb8f0b4e0a2b6e5df11539094ec Mon Sep 17 00:00:00 2001
From: Lulu Cheng <chenglulu@loongson.cn>
Date: Sat, 17 Jan 2026 15:12:46 +0800
Subject: [PATCH 30/31] LoongArch: Fix bug117575.

In the template "vec_set<mode>", a call is made to
"lasx_xvinsve0_<lasxfmt_f>_scalar", but there is an issue due to
the different ranges of operand1 between the two templates.
The range of operand1 in the template
"lasx_xvinsve0_<lasxfmt_f>_scalar" is now set to be the same as
that in "vec_set<mode>".

	PR target/117575

gcc/ChangeLog:

	* config/loongarch/lasx.md: Modify the range of operand1.

gcc/testsuite/ChangeLog:

	* g++.target/loongarch/pr117575.C: New test.
---
 gcc/config/loongarch/lasx.md                  |  2 +-
 gcc/testsuite/g++.target/loongarch/pr117575.C | 81 +++++++++++++++++++
 2 files changed, 82 insertions(+), 1 deletion(-)
 create mode 100644 gcc/testsuite/g++.target/loongarch/pr117575.C

diff --git a/gcc/config/loongarch/lasx.md b/gcc/config/loongarch/lasx.md
index 15142f8949e..c2aefea273e 100644
--- a/gcc/config/loongarch/lasx.md
+++ b/gcc/config/loongarch/lasx.md
@@ -4524,7 +4524,7 @@
   [(set (match_operand:FLASX 0 "register_operand" "=f")
     (vec_merge:FLASX
       (vec_duplicate:FLASX
-        (match_operand:<UNITMODE> 1 "register_operand" "f"))
+        (match_operand:<UNITMODE> 1 "reg_or_0_operand" "f"))
       (match_operand:FLASX 2 "register_operand" "0")
       (match_operand 3 "const_<bitmask256>_operand" "")))]
   "ISA_HAS_LASX"
diff --git a/gcc/testsuite/g++.target/loongarch/pr117575.C b/gcc/testsuite/g++.target/loongarch/pr117575.C
new file mode 100644
index 00000000000..1870d4c890a
--- /dev/null
+++ b/gcc/testsuite/g++.target/loongarch/pr117575.C
@@ -0,0 +1,81 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -mlasx" } */
+
+typedef long unsigned int size_t;
+typedef unsigned char simde__mmask8;
+typedef double simde_float64;
+typedef simde_float64 simde__m512d __attribute__ ((__aligned__ ((64))))
+__attribute__ ((__vector_size__ (64))) __attribute__ ((__may_alias__));
+typedef simde_float64 simde__m256d __attribute__ ((__aligned__ ((32))))
+__attribute__ ((__vector_size__ (32))) __attribute__ ((__may_alias__));
+simde__m512d simde_mm512_set_pd (simde_float64 e7, simde_float64 e6,
+                                 simde_float64 e5, simde_float64 e4,
+                                 simde_float64 e3, simde_float64 e2,
+                                 simde_float64 e1, simde_float64 e0);
+simde__m256d simde_mm256_maskz_mov_pd (simde__mmask8 k, simde__m256d a);
+int simde_test_x86_assert_equal_f64x4_ (simde__m256d a, simde__m256d b);
+
+typedef union
+{
+
+  __attribute__ ((__aligned__ ((32)))) simde_float64 f64
+      __attribute__ ((__vector_size__ (32))) __attribute__ ((__may_alias__));
+} simde__m256d_private;
+__attribute__ ((__always_inline__)) inline static simde__m256d
+simde__m256d_from_private (simde__m256d_private v)
+{
+  simde__m256d r;
+  __builtin_memcpy (&r, &v, sizeof (r));
+  return r;
+}
+simde__m256d
+simde_mm256_set_pd (simde_float64 e3, simde_float64 e2, simde_float64 e1,
+                    simde_float64 e0)
+{
+
+  simde__m256d_private r_;
+
+  r_.f64[0] = e0;
+  r_.f64[1] = e1;
+  r_.f64[2] = e2;
+  r_.f64[3] = e3;
+
+  return simde__m256d_from_private (r_);
+}
+
+simde__m256d simde_mm512_extractf64x4_pd (simde__m512d a, int imm8);
+int
+test_simde_mm512_maskz_extractf64x4_pd (void)
+{
+  const struct
+  {
+    simde__mmask8 k;
+    simde__m512d a;
+    simde__m256d r0;
+    simde__m256d r1;
+  } test_vec[2] = {
+    { 21,
+      simde_mm512_set_pd (-139.11, -172.36, -268.86, 393.53, -71.72, 36.69,
+                          98.47, -135.52),
+      simde_mm256_set_pd (0.00, 36.69, 0.00, -135.52),
+      simde_mm256_set_pd (0.00, -172.36, 0.00, 393.53) },
+    { 150,
+      simde_mm512_set_pd (-556.90, 522.06, 160.98, -932.28, 391.82, 600.12,
+                          -569.99, -491.12),
+      simde_mm256_set_pd (0.00, 600.12, -569.99, 0.00),
+      simde_mm256_set_pd (0.00, 522.06, 160.98, 0.00) },
+  };
+
+  for (size_t i = 0; i < (sizeof (test_vec) / sizeof (test_vec[0])); i++)
+    {
+      simde__m256d r;
+      r = simde_mm256_maskz_mov_pd (
+          test_vec[i].k, simde_mm512_extractf64x4_pd (test_vec[i].a, 0));
+      if (simde_test_x86_assert_equal_f64x4_ (r, test_vec[i].r0))
+        {
+          return 1;
+        }
+    }
+
+  return 0;
+}
-- 
Gitee


From 15f0a0168f691764f0fa6e8108ca7d8b109b3bca Mon Sep 17 00:00:00 2001
From: Lulu Cheng <chenglulu@loongson.cn>
Date: Tue, 27 Jan 2026 10:31:36 +0800
Subject: [PATCH 31/31] LoongArch: Fix bug123766.

The pointer parameter type for the original store class builtin
functions is CVPOINTER (const volatile void *).
Taking the following test as an example:

	```
	v4i64 v = {0, 0, 0, 0};
	void try_store() {
  	  long r[4];
  	  __lasx_xvst(v, r, 0);
	}
	```
At this point, the type of r is CVPOINTER, which means data in memory
can only be read through r.  Therefore, if the array r is not
initialized, an uninitialized warning will be issued.
This patch changes the pointer type of store-class builtin functions
from CVPOINTER to VPOINTER (volatile void *).

	PR target/123766

gcc/ChangeLog:

	* config/loongarch/loongarch-builtins.cc
	(loongarch_build_vpointer_type): New function.  Return a type
	for 'volatile void *'.
	(LARCH_ATYPE_VPOINTER): New macro.
	* config/loongarch/loongarch-ftypes.def: Change the pointer
	type of the store class function from CVPOINTER to VPOINTER.

gcc/testsuite/ChangeLog:

	* gcc.target/loongarch/vector/lasx/pr123766.c: New test.
	* gcc.target/loongarch/vector/lsx/pr123766.c: New test.
---
 gcc/config/loongarch/loongarch-builtins.cc    | 38 +++++++++++++------
 gcc/config/loongarch/loongarch-ftypes.def     | 24 ++++++------
 .../loongarch/vector/lasx/pr123766.c          | 28 ++++++++++++++
 .../loongarch/vector/lsx/pr123766.c           | 28 ++++++++++++++
 4 files changed, 94 insertions(+), 24 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/loongarch/vector/lasx/pr123766.c
 create mode 100644 gcc/testsuite/gcc.target/loongarch/vector/lsx/pr123766.c

diff --git a/gcc/config/loongarch/loongarch-builtins.cc b/gcc/config/loongarch/loongarch-builtins.cc
index 5d06a4d88c2..5edbea34544 100644
--- a/gcc/config/loongarch/loongarch-builtins.cc
+++ b/gcc/config/loongarch/loongarch-builtins.cc
@@ -1408,10 +1408,10 @@ static const struct loongarch_builtin_description loongarch_builtins[] = {
   LSX_BUILTIN (vfrintrp_d, LARCH_V2DF_FTYPE_V2DF),
   LSX_BUILTIN (vfrintrm_s, LARCH_V4SF_FTYPE_V4SF),
   LSX_BUILTIN (vfrintrm_d, LARCH_V2DF_FTYPE_V2DF),
-  LSX_NO_TARGET_BUILTIN (vstelm_b, LARCH_VOID_FTYPE_V16QI_CVPOINTER_SI_UQI),
-  LSX_NO_TARGET_BUILTIN (vstelm_h, LARCH_VOID_FTYPE_V8HI_CVPOINTER_SI_UQI),
-  LSX_NO_TARGET_BUILTIN (vstelm_w, LARCH_VOID_FTYPE_V4SI_CVPOINTER_SI_UQI),
-  LSX_NO_TARGET_BUILTIN (vstelm_d, LARCH_VOID_FTYPE_V2DI_CVPOINTER_SI_UQI),
+  LSX_NO_TARGET_BUILTIN (vstelm_b, LARCH_VOID_FTYPE_V16QI_VPOINTER_SI_UQI),
+  LSX_NO_TARGET_BUILTIN (vstelm_h, LARCH_VOID_FTYPE_V8HI_VPOINTER_SI_UQI),
+  LSX_NO_TARGET_BUILTIN (vstelm_w, LARCH_VOID_FTYPE_V4SI_VPOINTER_SI_UQI),
+  LSX_NO_TARGET_BUILTIN (vstelm_d, LARCH_VOID_FTYPE_V2DI_VPOINTER_SI_UQI),
   LSX_BUILTIN (vaddwev_d_w, LARCH_V2DI_FTYPE_V4SI_V4SI),
   LSX_BUILTIN (vaddwev_w_h, LARCH_V4SI_FTYPE_V8HI_V8HI),
   LSX_BUILTIN (vaddwev_h_b, LARCH_V8HI_FTYPE_V16QI_V16QI),
@@ -1581,7 +1581,7 @@ static const struct loongarch_builtin_description loongarch_builtins[] = {
   LSX_BUILTIN (vssrarni_du_q, LARCH_UV2DI_FTYPE_UV2DI_V2DI_USI),
   LSX_BUILTIN (vpermi_w, LARCH_V4SI_FTYPE_V4SI_V4SI_USI),
   LSX_BUILTIN (vld, LARCH_V16QI_FTYPE_CVPOINTER_SI),
-  LSX_NO_TARGET_BUILTIN (vst, LARCH_VOID_FTYPE_V16QI_CVPOINTER_SI),
+  LSX_NO_TARGET_BUILTIN (vst, LARCH_VOID_FTYPE_V16QI_VPOINTER_SI),
   LSX_BUILTIN (vssrlrn_b_h, LARCH_V16QI_FTYPE_V8HI_V8HI),
   LSX_BUILTIN (vssrlrn_h_w, LARCH_V8HI_FTYPE_V4SI_V4SI),
   LSX_BUILTIN (vssrlrn_w_d, LARCH_V4SI_FTYPE_V2DI_V2DI),
@@ -1592,7 +1592,7 @@ static const struct loongarch_builtin_description loongarch_builtins[] = {
   LSX_BUILTIN (vldi, LARCH_V2DI_FTYPE_HI),
   LSX_BUILTIN (vshuf_b, LARCH_V16QI_FTYPE_V16QI_V16QI_V16QI),
   LSX_BUILTIN (vldx, LARCH_V16QI_FTYPE_CVPOINTER_DI),
-  LSX_NO_TARGET_BUILTIN (vstx, LARCH_VOID_FTYPE_V16QI_CVPOINTER_DI),
+  LSX_NO_TARGET_BUILTIN (vstx, LARCH_VOID_FTYPE_V16QI_VPOINTER_DI),
   LSX_BUILTIN (vextl_qu_du, LARCH_UV2DI_FTYPE_UV2DI),
 
   /* Built-in functions for LASX */
@@ -2121,11 +2121,11 @@ static const struct loongarch_builtin_description loongarch_builtins[] = {
   LASX_BUILTIN (xvfrintrm_s, LARCH_V8SF_FTYPE_V8SF),
   LASX_BUILTIN (xvfrintrm_d, LARCH_V4DF_FTYPE_V4DF),
   LASX_BUILTIN (xvld, LARCH_V32QI_FTYPE_CVPOINTER_SI),
-  LASX_NO_TARGET_BUILTIN (xvst, LARCH_VOID_FTYPE_V32QI_CVPOINTER_SI),
-  LASX_NO_TARGET_BUILTIN (xvstelm_b, LARCH_VOID_FTYPE_V32QI_CVPOINTER_SI_UQI),
-  LASX_NO_TARGET_BUILTIN (xvstelm_h, LARCH_VOID_FTYPE_V16HI_CVPOINTER_SI_UQI),
-  LASX_NO_TARGET_BUILTIN (xvstelm_w, LARCH_VOID_FTYPE_V8SI_CVPOINTER_SI_UQI),
-  LASX_NO_TARGET_BUILTIN (xvstelm_d, LARCH_VOID_FTYPE_V4DI_CVPOINTER_SI_UQI),
+  LASX_NO_TARGET_BUILTIN (xvst, LARCH_VOID_FTYPE_V32QI_VPOINTER_SI),
+  LASX_NO_TARGET_BUILTIN (xvstelm_b, LARCH_VOID_FTYPE_V32QI_VPOINTER_SI_UQI),
+  LASX_NO_TARGET_BUILTIN (xvstelm_h, LARCH_VOID_FTYPE_V16HI_VPOINTER_SI_UQI),
+  LASX_NO_TARGET_BUILTIN (xvstelm_w, LARCH_VOID_FTYPE_V8SI_VPOINTER_SI_UQI),
+  LASX_NO_TARGET_BUILTIN (xvstelm_d, LARCH_VOID_FTYPE_V4DI_VPOINTER_SI_UQI),
   LASX_BUILTIN (xvinsve0_w, LARCH_V8SI_FTYPE_V8SI_V8SI_UQI),
   LASX_BUILTIN (xvinsve0_d, LARCH_V4DI_FTYPE_V4DI_V4DI_UQI),
   LASX_BUILTIN (xvpickve_w, LARCH_V8SI_FTYPE_V8SI_UQI),
@@ -2141,7 +2141,7 @@ static const struct loongarch_builtin_description loongarch_builtins[] = {
   LASX_BUILTIN (xvorn_v, LARCH_UV32QI_FTYPE_UV32QI_UV32QI),
   LASX_BUILTIN (xvldi, LARCH_V4DI_FTYPE_HI),
   LASX_BUILTIN (xvldx, LARCH_V32QI_FTYPE_CVPOINTER_DI),
-  LASX_NO_TARGET_BUILTIN (xvstx, LARCH_VOID_FTYPE_V32QI_CVPOINTER_DI),
+  LASX_NO_TARGET_BUILTIN (xvstx, LARCH_VOID_FTYPE_V32QI_VPOINTER_DI),
   LASX_BUILTIN (xvextl_qu_du, LARCH_UV4DI_FTYPE_UV4DI),
 
   /* LASX */
@@ -2391,11 +2391,25 @@ loongarch_build_cvpointer_type (void)
   return cache;
 }
 
+/* Return a type for 'volatile void *'.  */
+
+static tree
+loongarch_build_vpointer_type (void)
+{
+  static tree cache;
+
+  if (cache == NULL_TREE)
+    cache = build_pointer_type (build_qualified_type (void_type_node,
+						      TYPE_QUAL_VOLATILE));
+  return cache;
+}
+
 /* Source-level argument types.  */
 #define LARCH_ATYPE_VOID void_type_node
 #define LARCH_ATYPE_INT integer_type_node
 #define LARCH_ATYPE_POINTER ptr_type_node
 #define LARCH_ATYPE_CVPOINTER loongarch_build_cvpointer_type ()
+#define LARCH_ATYPE_VPOINTER loongarch_build_vpointer_type ()
 #define LARCH_ATYPE_BOOLEAN boolean_type_node
 /* Standard mode-based argument types.  */
 #define LARCH_ATYPE_QI intQI_type_node
diff --git a/gcc/config/loongarch/loongarch-ftypes.def b/gcc/config/loongarch/loongarch-ftypes.def
index c7f849e8805..ee6051ee1c4 100644
--- a/gcc/config/loongarch/loongarch-ftypes.def
+++ b/gcc/config/loongarch/loongarch-ftypes.def
@@ -497,10 +497,10 @@ DEF_LARCH_FTYPE (2, (VOID, SI, SI))
 DEF_LARCH_FTYPE (2, (VOID, UQI, SI))
 DEF_LARCH_FTYPE (2, (VOID, USI, UQI))
 DEF_LARCH_FTYPE (1, (VOID, UHI))
-DEF_LARCH_FTYPE (3, (VOID, V16QI, CVPOINTER, SI))
-DEF_LARCH_FTYPE (3, (VOID, V16QI, CVPOINTER, DI))
-DEF_LARCH_FTYPE (3, (VOID, V32QI, CVPOINTER, SI))
-DEF_LARCH_FTYPE (3, (VOID, V32QI, CVPOINTER, DI))
+DEF_LARCH_FTYPE (3, (VOID, V16QI, VPOINTER, SI))
+DEF_LARCH_FTYPE (3, (VOID, V16QI, VPOINTER, DI))
+DEF_LARCH_FTYPE (3, (VOID, V32QI, VPOINTER, SI))
+DEF_LARCH_FTYPE (3, (VOID, V32QI, VPOINTER, DI))
 DEF_LARCH_FTYPE (3, (VOID, V4DF, POINTER, SI))
 DEF_LARCH_FTYPE (3, (VOID, V2DF, POINTER, SI))
 DEF_LARCH_FTYPE (3, (VOID, V2DI, CVPOINTER, SI))
@@ -682,10 +682,10 @@ DEF_LARCH_FTYPE(3,(V4DI,V4DI,UV16HI,V16HI))
 DEF_LARCH_FTYPE(2,(UV8SI,UV32QI,UV32QI))
 DEF_LARCH_FTYPE(2,(V8SI,UV32QI,V32QI))
 
-DEF_LARCH_FTYPE(4,(VOID,V16QI,CVPOINTER,SI,UQI))
-DEF_LARCH_FTYPE(4,(VOID,V8HI,CVPOINTER,SI,UQI))
-DEF_LARCH_FTYPE(4,(VOID,V4SI,CVPOINTER,SI,UQI))
-DEF_LARCH_FTYPE(4,(VOID,V2DI,CVPOINTER,SI,UQI))
+DEF_LARCH_FTYPE(4,(VOID,V16QI,VPOINTER,SI,UQI))
+DEF_LARCH_FTYPE(4,(VOID,V8HI,VPOINTER,SI,UQI))
+DEF_LARCH_FTYPE(4,(VOID,V4SI,VPOINTER,SI,UQI))
+DEF_LARCH_FTYPE(4,(VOID,V2DI,VPOINTER,SI,UQI))
 
 DEF_LARCH_FTYPE (2, (DI, V16QI, UQI))
 DEF_LARCH_FTYPE (2, (DI, V8HI, UQI))
@@ -707,10 +707,10 @@ DEF_LARCH_FTYPE (3, (UV16HI, UV16HI, V16HI, USI))
 DEF_LARCH_FTYPE (3, (UV8SI, UV8SI, V8SI, USI))
 DEF_LARCH_FTYPE (3, (UV4DI, UV4DI, V4DI, USI))
 
-DEF_LARCH_FTYPE(4,(VOID,V32QI,CVPOINTER,SI,UQI))
-DEF_LARCH_FTYPE(4,(VOID,V16HI,CVPOINTER,SI,UQI))
-DEF_LARCH_FTYPE(4,(VOID,V8SI,CVPOINTER,SI,UQI))
-DEF_LARCH_FTYPE(4,(VOID,V4DI,CVPOINTER,SI,UQI))
+DEF_LARCH_FTYPE(4,(VOID,V32QI,VPOINTER,SI,UQI))
+DEF_LARCH_FTYPE(4,(VOID,V16HI,VPOINTER,SI,UQI))
+DEF_LARCH_FTYPE(4,(VOID,V8SI,VPOINTER,SI,UQI))
+DEF_LARCH_FTYPE(4,(VOID,V4DI,VPOINTER,SI,UQI))
 
 DEF_LARCH_FTYPE (1, (BOOLEAN,V16QI))
 DEF_LARCH_FTYPE(2,(V16QI,CVPOINTER,CVPOINTER))
diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/pr123766.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/pr123766.c
new file mode 100644
index 00000000000..88170cd6aef
--- /dev/null
+++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/pr123766.c
@@ -0,0 +1,28 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mlasx  -Werror -Wextra" } */
+
+#include <lasxintrin.h>
+
+__m256i v = {0, 0, 0, 0};
+
+#define TEST(NAME)			  \
+  void test_##NAME (void)		  \
+  {					  \
+    long long r[4];			  \
+    __lasx_##NAME (v, r, 0);		  \
+  }
+
+TEST (xvst);
+TEST (xvstx);
+
+#define TEST1(NAME, TYPE, NUM)		  \
+  void test_##NAME (void)		  \
+  {					  \
+    TYPE r[NUM];			  \
+    __lasx_##NAME (v, r, 0, 0);		  \
+  }
+
+TEST1 (xvstelm_b, char, 32);
+TEST1 (xvstelm_h, short, 16);
+TEST1 (xvstelm_w, int, 8);
+TEST1 (xvstelm_d, long long, 4);
diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/pr123766.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/pr123766.c
new file mode 100644
index 00000000000..54f57e61113
--- /dev/null
+++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/pr123766.c
@@ -0,0 +1,28 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mlsx  -Werror -Wextra" } */
+
+#include <lsxintrin.h>
+
+__m128i v = {0, 0};
+
+#define TEST(NAME)			  \
+  void test_##NAME (void)		  \
+  {					  \
+    long long r[2];			  \
+    __lsx_##NAME (v, r, 0);		  \
+  }
+
+TEST (vst);
+TEST (vstx);
+
+#define TEST1(NAME, TYPE, NUM)		  \
+  void test_##NAME (void)		  \
+  {					  \
+    TYPE r[NUM];			  \
+    __lsx_##NAME (v, r, 0, 0);		  \
+  }
+
+TEST1 (vstelm_b, char, 16);
+TEST1 (vstelm_h, short, 8);
+TEST1 (vstelm_w, int, 4);
+TEST1 (vstelm_d, long long, 2);
-- 
Gitee