From f1706a1c3cba38e0a51a1b6a74de11fe91b1611f Mon Sep 17 00:00:00 2001 From: sfencevma <15155930562@163.com> Date: Tue, 7 Jan 2025 13:11:37 +0800 Subject: [PATCH 1/6] timing(LoadUnit): fpWen and pdest reg out --- .../xiangshan/mem/pipeline/LoadUnit.scala | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/src/main/scala/xiangshan/mem/pipeline/LoadUnit.scala b/src/main/scala/xiangshan/mem/pipeline/LoadUnit.scala index d008d2d11f..83a37034a2 100644 --- a/src/main/scala/xiangshan/mem/pipeline/LoadUnit.scala +++ b/src/main/scala/xiangshan/mem/pipeline/LoadUnit.scala @@ -1147,6 +1147,9 @@ class LoadUnit(implicit p: Parameters) extends XSModule val s2_pbmt = RegEnable(s1_pbmt, s1_fire) val s2_trigger_debug_mode = RegEnable(s1_trigger_debug_mode, false.B, s1_fire) val s2_nc_with_data = RegNext(s1_nc_with_data) + val s2_mmio_req = Wire(Valid(new MemExuOutput)) + s2_mmio_req.valid := RegNextN(io.lsq.uncache.fire, 2, Some(false.B)) + s2_mmio_req.bits := RegNextN(io.lsq.uncache.bits, 2) s2_kill := s2_in.uop.robIdx.needFlush(io.redirect) s2_ready := !s2_valid || s2_kill || s3_ready @@ -1481,7 +1484,8 @@ class LoadUnit(implicit p: Parameters) extends XSModule val s3_vec_alignedType = RegEnable(s2_out.alignedType, s2_fire) val s3_vec_mBIndex = RegEnable(s2_out.mbIndex, s2_fire) val s3_frm_mabuf = s3_in.isFrmMisAlignBuf - val s3_mmio = Wire(Valid(new MemExuOutput)) + val s3_mmio_req = RegEnable(s2_mmio_req, s2_fire) + val s3_pdest = RegEnable(Mux(s2_valid, s2_out.uop.pdest, s2_mmio_req.bits.uop.pdest), s2_fire) val s3_data_select = RegEnable(s2_data_select, 0.U(s2_data_select.getWidth.W), s2_fire) val s3_data_select_by_offset = RegEnable(s2_data_select_by_offset, 0.U.asTypeOf(s2_data_select_by_offset), s2_fire) val s3_hw_err = @@ -1502,8 +1506,7 @@ class LoadUnit(implicit p: Parameters) extends XSModule XSError(s3_valid && s3_vec_mb_nack, "Merge buffer should always accept vector loads!") s3_ready := !s3_valid || s3_kill || io.ldout.ready - s3_mmio.valid := RegNextN(io.lsq.uncache.fire, 3, Some(false.B)) - s3_mmio.bits := RegNextN(io.lsq.uncache.bits, 3) + // forwrad last beat val s3_fast_rep_canceled = io.replay.valid && io.replay.bits.forward_tlDchannel || io.misalign_ldin.valid || !io.dcache.req.ready @@ -1647,7 +1650,7 @@ class LoadUnit(implicit p: Parameters) extends XSModule // TODO: vector wakeup? io.ldCancel.ld2Cancel := s3_valid && !s3_safe_wakeup && !s3_isvec && (!s3_frm_mabuf || s3_in.misalignNeedWakeUp) - val s3_ld_wb_meta = Mux(s3_valid, s3_out.bits, s3_mmio.bits) + val s3_ld_wb_meta = Mux(s3_valid, s3_out.bits, s3_mmio_req.bits) // data from load queue refill val s3_ld_raw_data_frm_mmio = RegNextN(io.lsq.ld_raw_data, 3) @@ -1744,15 +1747,15 @@ class LoadUnit(implicit p: Parameters) extends XSModule // FIXME: add 1 cycle delay ? // io.lsq.uncache.ready := !s3_valid val s3_outexception = ExceptionNO.selectByFu(s3_out.bits.uop.exceptionVec, LduCfg).asUInt.orR && s3_vecActive + io.ldout.valid := s3_mmio_req.valid || s3_valid io.ldout.bits := s3_ld_wb_meta io.ldout.bits.data := Mux(s3_valid, s3_ld_data_frm_pipe, s3_ld_data_frm_mmio) - - io.ldout.valid := (s3_mmio.valid || - (s3_out.valid && !s3_vecout.isvec && !s3_frm_mabuf)) + io.ldout.bits.uop.pdest := s3_pdest io.ldout.bits.uop.exceptionVec := ExceptionNO.selectByFu(s3_ld_wb_meta.uop.exceptionVec, LduCfg) io.ldout.bits.isFromLoadUnit := true.B // TODO vector? io.ldout.bits.uop.rfWen := !io.ldCancel.ld2Cancel && s3_ld_wb_meta.uop.rfWen + io.ldout.bits.uop.fpWen := !io.ldCancel.ld2Cancel && s3_ld_wb_meta.uop.fpWen io.ldout.bits.uop.fuType := Mux( s3_valid && s3_isvec, FuType.vldu.U, @@ -1766,7 +1769,7 @@ class LoadUnit(implicit p: Parameters) extends XSModule // io.lsq.uncache.valid && !io.lsq.uncache.bits.uop.robIdx.needFlush(io.redirect) && !s3_out.valid && !io.lsq.uncache.bits.isVls // io.ldout.bits.data := Mux(s3_out.valid, s3_ld_data_frm_pipe, s3_ld_data_frm_mmio) // io.ldout.valid := s3_out.valid && !s3_out.bits.uop.robIdx.needFlush(io.redirect) || - // s3_mmio.valid && !s3_mmio.bits.uop.robIdx.needFlush(io.redirect) && !s3_out.valid + // s3_mmio_req.valid && !s3_mmio_req.bits.uop.robIdx.needFlush(io.redirect) && !s3_out.valid // s3 load fast replay io.fast_rep_out.valid := s3_valid && s3_fast_rep From 8dd642f9b202f1af43b303e4df242e5157c9b749 Mon Sep 17 00:00:00 2001 From: sfencevma <15155930562@163.com> Date: Tue, 7 Jan 2025 13:51:55 +0800 Subject: [PATCH 2/6] fix(LoadUnit): s3_mmio_req enable condition --- src/main/scala/xiangshan/mem/pipeline/LoadUnit.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/scala/xiangshan/mem/pipeline/LoadUnit.scala b/src/main/scala/xiangshan/mem/pipeline/LoadUnit.scala index 83a37034a2..e1955819a1 100644 --- a/src/main/scala/xiangshan/mem/pipeline/LoadUnit.scala +++ b/src/main/scala/xiangshan/mem/pipeline/LoadUnit.scala @@ -1484,7 +1484,7 @@ class LoadUnit(implicit p: Parameters) extends XSModule val s3_vec_alignedType = RegEnable(s2_out.alignedType, s2_fire) val s3_vec_mBIndex = RegEnable(s2_out.mbIndex, s2_fire) val s3_frm_mabuf = s3_in.isFrmMisAlignBuf - val s3_mmio_req = RegEnable(s2_mmio_req, s2_fire) + val s3_mmio_req = RegEnable(s2_mmio_req, s2_mmio_req.valid) val s3_pdest = RegEnable(Mux(s2_valid, s2_out.uop.pdest, s2_mmio_req.bits.uop.pdest), s2_fire) val s3_data_select = RegEnable(s2_data_select, 0.U(s2_data_select.getWidth.W), s2_fire) val s3_data_select_by_offset = RegEnable(s2_data_select_by_offset, 0.U.asTypeOf(s2_data_select_by_offset), s2_fire) From 4bb61ab722113d810b65629397c1f9d64fc38fcd Mon Sep 17 00:00:00 2001 From: sfencevma <15155930562@163.com> Date: Tue, 7 Jan 2025 13:53:34 +0800 Subject: [PATCH 3/6] fix(LoadUnit): s3_mmio_req use RegNext instead of RegEnable --- src/main/scala/xiangshan/mem/pipeline/LoadUnit.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/scala/xiangshan/mem/pipeline/LoadUnit.scala b/src/main/scala/xiangshan/mem/pipeline/LoadUnit.scala index e1955819a1..01ef2d9d63 100644 --- a/src/main/scala/xiangshan/mem/pipeline/LoadUnit.scala +++ b/src/main/scala/xiangshan/mem/pipeline/LoadUnit.scala @@ -1484,7 +1484,7 @@ class LoadUnit(implicit p: Parameters) extends XSModule val s3_vec_alignedType = RegEnable(s2_out.alignedType, s2_fire) val s3_vec_mBIndex = RegEnable(s2_out.mbIndex, s2_fire) val s3_frm_mabuf = s3_in.isFrmMisAlignBuf - val s3_mmio_req = RegEnable(s2_mmio_req, s2_mmio_req.valid) + val s3_mmio_req = RegNext(s2_mmio_req) val s3_pdest = RegEnable(Mux(s2_valid, s2_out.uop.pdest, s2_mmio_req.bits.uop.pdest), s2_fire) val s3_data_select = RegEnable(s2_data_select, 0.U(s2_data_select.getWidth.W), s2_fire) val s3_data_select_by_offset = RegEnable(s2_data_select_by_offset, 0.U.asTypeOf(s2_data_select_by_offset), s2_fire) From 063074b0aa2acb2037d802b078aaa7aabcd53ba3 Mon Sep 17 00:00:00 2001 From: sfencevma <15155930562@163.com> Date: Tue, 7 Jan 2025 15:09:58 +0800 Subject: [PATCH 4/6] fix(LoadUnit): fix fpWen and rfWenenable condition --- src/main/scala/xiangshan/mem/pipeline/LoadUnit.scala | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/main/scala/xiangshan/mem/pipeline/LoadUnit.scala b/src/main/scala/xiangshan/mem/pipeline/LoadUnit.scala index 01ef2d9d63..98bb383a70 100644 --- a/src/main/scala/xiangshan/mem/pipeline/LoadUnit.scala +++ b/src/main/scala/xiangshan/mem/pipeline/LoadUnit.scala @@ -1485,7 +1485,9 @@ class LoadUnit(implicit p: Parameters) extends XSModule val s3_vec_mBIndex = RegEnable(s2_out.mbIndex, s2_fire) val s3_frm_mabuf = s3_in.isFrmMisAlignBuf val s3_mmio_req = RegNext(s2_mmio_req) - val s3_pdest = RegEnable(Mux(s2_valid, s2_out.uop.pdest, s2_mmio_req.bits.uop.pdest), s2_fire) + val s3_pdest = RegNext(Mux(s2_valid, s2_out.uop.pdest, s2_mmio_req.bits.uop.pdest)) + val s3_rfWen = RegEnable(Mux(s2_valid, s2_out.uop.rfWen, s2_mmio_req.bits.uop.rfWen), s2_valid || s2_mmio_req.valid) + val s3_fpWen = RegEnable(Mux(s2_valid, s2_out.uop.fpWen, s2_mmio_req.bits.uop.fpWen), s2_valid || s2_mmio_req.valid) val s3_data_select = RegEnable(s2_data_select, 0.U(s2_data_select.getWidth.W), s2_fire) val s3_data_select_by_offset = RegEnable(s2_data_select_by_offset, 0.U.asTypeOf(s2_data_select_by_offset), s2_fire) val s3_hw_err = @@ -1750,12 +1752,11 @@ class LoadUnit(implicit p: Parameters) extends XSModule io.ldout.valid := s3_mmio_req.valid || s3_valid io.ldout.bits := s3_ld_wb_meta io.ldout.bits.data := Mux(s3_valid, s3_ld_data_frm_pipe, s3_ld_data_frm_mmio) + io.ldout.bits.uop.rfWen := !io.ldCancel.ld2Cancel && s3_rfWen + io.ldout.bits.uop.fpWen := !io.ldCancel.ld2Cancel && s3_fpWen io.ldout.bits.uop.pdest := s3_pdest io.ldout.bits.uop.exceptionVec := ExceptionNO.selectByFu(s3_ld_wb_meta.uop.exceptionVec, LduCfg) io.ldout.bits.isFromLoadUnit := true.B - // TODO vector? - io.ldout.bits.uop.rfWen := !io.ldCancel.ld2Cancel && s3_ld_wb_meta.uop.rfWen - io.ldout.bits.uop.fpWen := !io.ldCancel.ld2Cancel && s3_ld_wb_meta.uop.fpWen io.ldout.bits.uop.fuType := Mux( s3_valid && s3_isvec, FuType.vldu.U, From 0e0c5fb247716bcbbae214762adb8a515dc6bf34 Mon Sep 17 00:00:00 2001 From: sfencevma <15155930562@163.com> Date: Tue, 7 Jan 2025 16:04:06 +0800 Subject: [PATCH 5/6] fix(LoadUnit): fix ldout.valid generate logic --- src/main/scala/xiangshan/mem/pipeline/LoadUnit.scala | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/main/scala/xiangshan/mem/pipeline/LoadUnit.scala b/src/main/scala/xiangshan/mem/pipeline/LoadUnit.scala index 98bb383a70..10f3f7b11d 100644 --- a/src/main/scala/xiangshan/mem/pipeline/LoadUnit.scala +++ b/src/main/scala/xiangshan/mem/pipeline/LoadUnit.scala @@ -1484,6 +1484,7 @@ class LoadUnit(implicit p: Parameters) extends XSModule val s3_vec_alignedType = RegEnable(s2_out.alignedType, s2_fire) val s3_vec_mBIndex = RegEnable(s2_out.mbIndex, s2_fire) val s3_frm_mabuf = s3_in.isFrmMisAlignBuf + val s3_ldout_valid = RegNext(s2_mmio_req.valid || s2_valid && !s2_out.isvec && !s2_out.isFrmMisAlignBuf) val s3_mmio_req = RegNext(s2_mmio_req) val s3_pdest = RegNext(Mux(s2_valid, s2_out.uop.pdest, s2_mmio_req.bits.uop.pdest)) val s3_rfWen = RegEnable(Mux(s2_valid, s2_out.uop.rfWen, s2_mmio_req.bits.uop.rfWen), s2_valid || s2_mmio_req.valid) @@ -1749,7 +1750,7 @@ class LoadUnit(implicit p: Parameters) extends XSModule // FIXME: add 1 cycle delay ? // io.lsq.uncache.ready := !s3_valid val s3_outexception = ExceptionNO.selectByFu(s3_out.bits.uop.exceptionVec, LduCfg).asUInt.orR && s3_vecActive - io.ldout.valid := s3_mmio_req.valid || s3_valid + io.ldout.valid := s3_ldout_valid io.ldout.bits := s3_ld_wb_meta io.ldout.bits.data := Mux(s3_valid, s3_ld_data_frm_pipe, s3_ld_data_frm_mmio) io.ldout.bits.uop.rfWen := !io.ldCancel.ld2Cancel && s3_rfWen From 11b555a2d29d5a2e2ff2aeec47b3af53efe9d374 Mon Sep 17 00:00:00 2001 From: sfencevma <15155930562@163.com> Date: Wed, 8 Jan 2025 13:17:00 +0800 Subject: [PATCH 6/6] fix(LoadUnit): fix ldout.valid generate logic --- src/main/scala/xiangshan/mem/pipeline/LoadUnit.scala | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/main/scala/xiangshan/mem/pipeline/LoadUnit.scala b/src/main/scala/xiangshan/mem/pipeline/LoadUnit.scala index 10f3f7b11d..fa4e38dd88 100644 --- a/src/main/scala/xiangshan/mem/pipeline/LoadUnit.scala +++ b/src/main/scala/xiangshan/mem/pipeline/LoadUnit.scala @@ -1484,7 +1484,6 @@ class LoadUnit(implicit p: Parameters) extends XSModule val s3_vec_alignedType = RegEnable(s2_out.alignedType, s2_fire) val s3_vec_mBIndex = RegEnable(s2_out.mbIndex, s2_fire) val s3_frm_mabuf = s3_in.isFrmMisAlignBuf - val s3_ldout_valid = RegNext(s2_mmio_req.valid || s2_valid && !s2_out.isvec && !s2_out.isFrmMisAlignBuf) val s3_mmio_req = RegNext(s2_mmio_req) val s3_pdest = RegNext(Mux(s2_valid, s2_out.uop.pdest, s2_mmio_req.bits.uop.pdest)) val s3_rfWen = RegEnable(Mux(s2_valid, s2_out.uop.rfWen, s2_mmio_req.bits.uop.rfWen), s2_valid || s2_mmio_req.valid) @@ -1749,12 +1748,14 @@ class LoadUnit(implicit p: Parameters) extends XSModule // FIXME: add 1 cycle delay ? // io.lsq.uncache.ready := !s3_valid + val s3_ldout_valid = s3_mmio_req.valid || + s3_out.valid && RegNext(!s2_out.isvec && !s2_out.isFrmMisAlignBuf) val s3_outexception = ExceptionNO.selectByFu(s3_out.bits.uop.exceptionVec, LduCfg).asUInt.orR && s3_vecActive io.ldout.valid := s3_ldout_valid io.ldout.bits := s3_ld_wb_meta io.ldout.bits.data := Mux(s3_valid, s3_ld_data_frm_pipe, s3_ld_data_frm_mmio) - io.ldout.bits.uop.rfWen := !io.ldCancel.ld2Cancel && s3_rfWen - io.ldout.bits.uop.fpWen := !io.ldCancel.ld2Cancel && s3_fpWen + io.ldout.bits.uop.rfWen := s3_rfWen + io.ldout.bits.uop.fpWen := s3_fpWen io.ldout.bits.uop.pdest := s3_pdest io.ldout.bits.uop.exceptionVec := ExceptionNO.selectByFu(s3_ld_wb_meta.uop.exceptionVec, LduCfg) io.ldout.bits.isFromLoadUnit := true.B