no functional run (did not compile)
(no automated diagnostics fired)
module mcu3 #(
parameter [0:255] IMEM_INIT = 256'h0 // 32 instruction bytes, byte 0 at MSB
) (
input clk,
input rst, // synchronous, active-high
output reg [4:0] pc_out, // current fetched PC (visible for testing)
output reg halt_out, // asserted forever after HALT executes
output reg signed [7:0] r0_out // current value of register R0
);
// Register File: 4 registers, 8-bit signed
reg signed [7:0] rf [0:3];
// Pipeline Stage Registers
// Fetch Stage
reg [4:0] pc_f;
reg [7:0] instr_f;
reg halt_f; // propagate halt signal through pipeline
// Execute Stage
reg [4:0] pc_x;
reg [7:0] instr_x;
reg halt_x;
reg [1:0] opcode_x;
reg [1:0] rd_x;
reg [1:0] rs_x;
reg [3:0] imm_x;
reg [1:0] cond_x;
reg signed [7:0] alu_result_x;
reg branch_taken_x;
reg [4:0] branch_target_x;
reg is_branch_x;
reg is_halt_x;
// Writeback Stage
reg [4:0] pc_w;
reg [7:0] instr_w;
reg halt_w;
reg [1:0] opcode_w;
reg [1:0] rd_w;
reg [1:0] rs_w;
reg [3:0] imm_w;
reg [1:0] cond_w;
reg signed [7:0] alu_result_w;
reg branch_taken_w;
reg [4:0] branch_target_w;
reg is_branch_w;
reg is_halt_w;
reg wb_en_w; // writeback enable
// Forwarding Logic
// We need to forward results from the Execute stage (X) and Writeback stage (W)
// to the Execute stage inputs.
// Since it's a 3-stage pipeline:
// Stage F -> Stage X -> Stage W
// Hazards occur when an instruction in X needs a register that is being written
// by an instruction in W or X.
// Forwarding signals
reg [1:0] fwd_a; // 00: no forward, 01: forward from X, 10: forward from W
reg [1:0] fwd_b; // 00: no forward, 01: forward from X, 10: forward from W
// Data to forward
reg signed [7:0] data_fwd_x; // Result from X stage ALU
reg signed [7:0] data_fwd_w; // Result from W stage ALU (or just the written value)
reg [1:0] rd_fwd_x; // RD of instruction in X stage
reg [1:0] rd_fwd_w; // RD of instruction in W stage
reg valid_x; // Is X stage holding a valid instruction?
reg valid_w; // Is W stage holding a valid instruction?
// Helper function for sign extension
function signed [7:0] sign_ext4;
input [3:0] val;
begin
sign_ext4 = {{4{val[3]}}, val};
end
endfunction
// Reset Logic
always @(posedge clk) begin
if (rst) begin
pc_f <= 5'b00000;
instr_f <= 8'b0;
halt_f <= 1'b0;
pc_x <= 5'b00000;
instr_x <= 8'b0;
halt_x <= 1'b0;
opcode_x <= 2'b00;
rd_x <= 2'b00;
rs_x <= 2'b00;
imm_x <= 4'b0000;
cond_x <= 2'b00;
alu_result_x <= 8'b0;
branch_taken_x <= 1'b0;
branch_target_x <= 5'b00000;
is_branch_x <= 1'b0;
is_halt_x <= 1'b0;
pc_w <= 5'b00000;
instr_w <= 8'b0;
halt_w <= 1'b0;
opcode_w <= 2'b00;
rd_w <= 2'b00;
rs_w <= 2'b00;
imm_w <= 4'b0000;
cond_w <= 2'b00;
alu_result_w <= 8'b0;
branch_taken_w <= 1'b0;
branch_target_w <= 5'b00000;
is_branch_w <= 1'b0;
is_halt_w <= 1'b0;
wb_en_w <= 1'b0;
pc_out <= 5'b00000;
halt_out <= 1'b0;
r0_out <= 8'b0;
for (integer i = 0; i < 4; i = i + 1) begin
rf[i] <= 8'b0;
end
fwd_a <= 2'b00;
fwd_b <= 2'b00;
data_fwd_x <= 8'b0;
data_fwd_w <= 8'b0;
rd_fwd_x <= 2'b00;
rd_fwd_w <= 2'b00;
valid_x <= 1'b0;
valid_w <= 1'b0;
end else begin
// Update outputs
pc_out <= pc_f;
halt_out <= halt_out | halt_w; // Once halted, stay halted
// r0_out reflects the most recently written value of R[0]
// If W stage writes to R0, update r0_out
if (wb_en_w && (rd_w == 2'b00)) begin
r0_out <= alu_result_w;
end else if (rst) begin // Handled in reset block, but for safety
r0_out <= 8'b0;
end
// Update Register File
if (wb_en_w) begin
rf[rd_w] <= alu_result_w;
end
// Update Forwarding Data
data_fwd_x <= alu_result_x;
rd_fwd_x <= rd_x;
valid_x <= ~halt_x; // Valid if not halted
data_fwd_w <= alu_result_w;
rd_fwd_w <= rd_w;
valid_w <= ~halt_w; // Valid if not halted
// Stage F: Fetch
if (!halt_out) begin
// Determine next PC for fetch
// If the previous instruction (now in W) was a taken branch,
// the PC should be the branch target.
// Note: The instruction in the delay slot (now in X) is executed,
// but the PC for the *next* fetch after the delay slot is the target.
// However, standard 1-delay-slot pipelines usually set the PC for the
// fetch stage based on the branch resolution in the W stage.
// Let's refine the PC update logic.
// PC_F is the PC of the instruction currently being fetched.
// At the end of the cycle, we decide what PC_F will be next.
if (halt_w) begin
pc_f <= pc_f; // Freeze
end else if (is_branch_w && branch_taken_w) begin
pc_f <= branch_target_w;
end else begin
pc_f <= pc_f + 1'b1;
end
// Fetch instruction
instr_f <= IMEM_INIT[pc_f * 8 +: 8];
halt_f <= 1'b0;
end else begin
// Already halted, freeze
pc_f <= pc_f;
instr_f <= instr_f;
halt_f <= 1'b1;
end
// Stage X: Decode/Execute
if (halt_out) begin
// Freeze X stage if halted
// But we need to be careful. If halt is asserted in W,
// X should also freeze.
// Actually, halt_out is updated from halt_w.
// If halt_w is 1, halt_out becomes 1 in the same cycle.
// So next cycle, halt_out is 1.
// We should freeze X if halt_out is 1 OR if halt_x was 1?
// Let's just freeze if halt_out is 1.
// Wait, if halt_out is 1, we don't want to advance the pipeline.
// So we keep the current X state.
end else begin
// Move F to X
pc_x <= pc_f;
instr_x <= instr_f;
halt_x <= halt_f;
// Decode
opcode_x <= instr_f[7:6];
if (instr_f[7:6] == 2'b00) begin // ADDI
rd_x <= instr_f[5:4];
rs_x <= 2'b00; // Unused
imm_x <= instr_f[3:0];
cond_x <= 2'b00; // Unused
is_branch_x <= 1'b0;
is_halt_x <= 1'b0;
end else if (instr_f[7:6] == 2'b01) begin // MOV
rd_x <= instr_f[5:4];
rs_x <= instr_f[3:2];
imm_x <= 4'b0000; // Unused
cond_x <= 2'b00; // Unused
is_branch_x <= 1'b0;
is_halt_x <= 1'b0;
end else if (instr_f[7:6] == 2'b10) begin // BRcc
cond_x <= instr_f[5:4];
rd_x <= 2'b00; // Unused
rs_x <= 2'b00; // Unused
imm_x <= instr_f[3:0];
is_branch_x <= 1'b1;
is_halt_x <= 1'b0;
end else begin // HALT
opcode_x <= 2'b11;
rd_x <= 2'b00;
rs_x <= 2'b00;
imm_x <= 4'b0000;
cond_x <= 2'b00;
is_branch_x <= 1'b0;
is_halt_x <= 1'b1;
end
// Execute
if (is_branch_x) begin
// Calculate branch target
branch_target_x <= pc_x + sign_ext4(imm_x) + 1'b1;
// Evaluate condition
// We need the current value of R0 for condition check.
// R0 might be updated by the instruction in W stage.
// So we should use forwarding for R0 read here?
// The condition depends on R0.
// Let's read R0 with forwarding.
signed [7:0] r0_val;
// Forwarding for R0 read in branch condition
if (valid_w && (rd_fwd_w == 2'b00)) begin
r0_val = data_fwd_w;
end else if (valid_x && (rd_fwd_x == 2'b00)) begin
r0_val = data_fwd_x;
end else begin
r0_val = rf[0];
end
if (cond_x == 2'b00) begin // Always
branch_taken_x <= 1'b1;
end else if (cond_x == 2'b01) begin // Zero
branch_taken_x <= (r0_val == 8'b0);
end else if (cond_x == 2'b10) begin // Negative
branch_taken_x <= (r0_val[7] == 1'b1);
end else begin // Positive (non-zero and positive? or just > 0?)
// "pos(R0)" usually means > 0.
branch_taken_x <= (r0_val > 8'b0);
end
alu_result_x <= 8'b0; // Unused
end else if (is_halt_x) begin
alu_result_x <= 8'b0;
branch_taken_x <= 1'b0;
branch_target_x <= 5'b0;
end else begin
// ADDI or MOV
// Read source register (for MOV) or use immediate (for ADDI)
// For ADDI: R[rd] <= R[rd] + imm
// For MOV: R[rd] <= R[rs]
signed [7:0] src_val;
if (opcode_x == 2'b00) begin // ADDI
// Read R[rd]
if (valid_w && (rd_fwd_w == rd_x)) begin
src_val = data_fwd_w;
end else if (valid_x && (rd_fwd_x == rd_x)) begin
src_val = data_fwd_x;
end else begin
src_val = rf[rd_x];
end
alu_result_x <= src_val + sign_ext4(imm_x);
end else if (opcode_x == 2'b01) begin // MOV
// Read R[rs]
if (valid_w && (rd_fwd_w == rs_x)) begin
src_val = data_fwd_w;
end else if (valid_x && (rd_fwd_x == rs_x)) begin
src_val = data_fwd_x;
end else begin
src_val = rf[rs_x];
end
alu_result_x <= src_val;
end else begin
alu_result_x <= 8'b0;
end
branch_taken_x <= 1'b0;
branch_target_x <= 5'b0;
end
end
// Stage W: Writeback
if (halt_out) begin
// Freeze W stage
end else begin
// Move X to W
pc_w <= pc_x;
instr_w <= instr_x;
halt_w <= halt_x;
opcode_w <= opcode_x;
rd_w <= rd_x;
rs_w <= rs_x;
imm_w <= imm_x;
cond_w <= cond_x;
alu_result_w <= alu_result_x;
branch_taken_w <= branch_taken_x;
branch_target_w <= branch_target_x;
is_branch_w <= is_branch_x;
is_halt_w <= is_halt_x;
// Determine if writeback should occur
if (is_halt_w) begin
wb_en_w <= 1'b0;
end else if (is_branch_w) begin
// If branch is taken, the delay slot instruction's result is discarded?
// The problem says: "the instruction after the branch always executes"
// "document whichever convention you choose"
// Convention: Delay slot instruction executes and writes back normally.
// The branch target is loaded into PC for the *next* fetch.
// So we do write back the delay slot instruction.
// However, if the branch is taken, do we discard the writeback?
// Standard MIPS: Delay slot instruction is committed.
// So wb_en_w should be 1 if it's a normal instruction.
// But wait, the instruction in W stage is the one that was in X stage.
// If the instruction in W stage is a branch, it doesn't write back.
// If the instruction in W stage is a normal instruction (ADDI/MOV), it writes back.
// The branch taken status in W stage refers to the branch instruction itself.
// So if is_branch_w is 1, it's a branch instruction, so no writeback.
wb_en_w <= 1'b0;
end else begin
// ADDI or MOV
wb_en_w <= 1'b1;
end
end
// Forwarding Logic for Execute Stage
// We need to forward data to the Execute stage inputs.
// The inputs are R[rs] for MOV and R[rd] for ADDI.
// We already handled the actual data selection in the Execute block above.
// But we need to update the forwarding registers for the next cycle.
// The forwarding logic determines which data to use for the *next* Execute stage.
// So we update fwd_a and fwd_b here based on the current X and W stages.
// For the next cycle's Execute stage, we need to know if we should forward from X or W.
// The instruction in the next X stage will be the current F stage instruction.
// Let's decode the current F stage instruction to see what it needs.
reg [1:0] next_rd;
reg [1:0] next_rs;
reg next_is_mov;
reg next_is_addi;
if (instr_f[7:6] == 2'b00) begin // ADDI
next_rd = instr_f[5:4];
next_rs = 2'b00;
next_is_addi = 1'b1;
next_is_mov = 1'b0;
end else if (instr_f[7:6] == 2'b01) begin // MOV
next_rd = instr_f[5:4];
next_rs = instr_f[3:2];
next_is_addi = 1'b0;
next_is_mov = 1'b1;
end else begin
next_rd = 2'b00;
next_rs = 2'b00;
next_is_addi = 1'b0;
next_is_mov = 1'b0;
end
// Determine forwarding for the next instruction (currently in F)
// It will move to X in the next cycle.
// We need to forward to its source registers.
// Forwarding for ADDI: needs R[next_rd]
if (next_is_addi) begin
if (valid_w && (rd_fwd_w == next_rd)) begin
fwd_a <= 2'b10; // Forward from W
end else if (valid_x && (rd_fwd_x == next_rd)) begin
fwd_a <= 2'b01; // Forward from X
end else begin
fwd_a <= 2'b00; // No forward
end
fwd_b <= 2'b00; // ADDI only has one source register
end else if (next_is_mov) begin
// Forwarding for MOV: needs R[next_rs]
if (valid_w && (rd_fwd_w == next_rs)) begin
fwd_b <= 2'b10; // Forward from W
end else if (valid_x && (rd_fwd_x == next_rs)) begin
fwd_b <= 2'b01; // Forward from X
end else begin
fwd_b <= 2'b00; // No forward
end
fwd_a <= 2'b00; // MOV only has one source register
end else begin
fwd_a <= 2'b00;
fwd_b <= 2'b00;
end
end
end
endmodule```verilog
// Branch convention: 1-branch-delay-slot. The instruction immediately following
// a BRcc is always fetched and executed. If the branch is taken, the delay-slot
// instruction's result is discarded (not written back), but its side effects
// (e.g., register updates) are committed. The PC for the next fetch after the
// delay slot is the branch target.
module mcu3 #(
parameter [0:255] IMEM_INIT = 256'h0 // 32 instruction bytes, byte 0 at MSB
) (
input clk,
input rst, // synchronous, active-high
output reg [4:0] pc_out, // current fetched PC (visible for testing)
output reg halt_out, // asserted forever after HALT executes
output reg signed [7:0] r0_out // current value of register R0
);
// Register File: 4 registers, 8-bit signed
reg signed [7:0] rf [0:3];
// Pipeline Stage Registers
// Fetch Stage
reg [4:0] pc_f;
reg [7:0] instr_f;
reg halt_f; // propagate halt signal through pipeline
// Execute Stage
reg [4:0] pc_x;
reg [7:0] instr_x;
reg halt_x;
reg [1:0] opcode_x;
reg [1:0] rd_x;
reg [1:0] rs_x;
reg [3:0] imm_x;
reg [1:0] cond_x;
reg signed [7:0] alu_result_x;
reg branch_taken_x;
reg [4:0] branch_target_x;
reg is_branch_x;
reg is_halt_x;
// Writeback Stage
reg [4:0] pc_w;
reg [7:0] instr_w;
reg halt_w;
reg [1:0] opcode_w;
reg [1:0] rd_w;
reg [1:0] rs_w;
reg [3:0] imm_w;
reg [1:0] cond_w;
reg signed [7:0] alu_result_w;
reg branch_taken_w;
reg [4:0] branch_target_w;
reg is_branch_w;
reg is_halt_w;
reg wb_en_w; // writeback enable
// Forwarding Logic
// We need to forward results from the Execute stage (X) and Writeback stage (W)
// to the Execute stage inputs.
// Since it's a 3-stage pipeline:
// Stage F -> Stage X -> Stage W
// Hazards occur when an instruction in X needs a register that is being written
// by an instruction in W or X.
// Forwarding signals
reg [1:0] fwd_a; // 00: no forward, 01: forward from X, 10: forward from W
reg [1:0] fwd_b; // 00: no forward, 01: forward from X, 10: forward from W
// Data to forward
reg signed [7:0] data_fwd_x; // Result from X stage ALU
reg signed [7:0] data_fwd_w; // Result from W stage ALU (or just the written value)
reg [1:0] rd_fwd_x; // RD of instruction in X stage
reg [1:0] rd_fwd_w; // RD of instruction in W stage
reg valid_x; // Is X stage holding a valid instruction?
reg valid_w; // Is W stage holding a valid instruction?
// Helper function for sign extension
function signed [7:0] sign_ext4;
input [3:0] val;
begin
sign_ext4 = {{4{val[3]}}, val};
end
endfunction
// Reset Logic
always @(posedge clk) begin
if (rst) begin
pc_f <= 5'b00000;
instr_f <= 8'b0;
halt_f <= 1'b0;
pc_x <= 5'b00000;
instr_x <= 8'b0;
halt_x <= 1'b0;
opcode_x <= 2'b00;
rd_x <= 2'b00;
rs_x <= 2'b00;
imm_x <= 4'b0000;
cond_x <= 2'b00;
alu_result_x <= 8'b0;
branch_taken_x <= 1'b0;
branch_target_x <= 5'b00000;
is_branch_x <= 1'b0;
is_halt_x <= 1'b0;
pc_w <= 5'b00000;
instr_w <= 8'b0;
halt_w <= 1'b0;
opcode_w <= 2'b00;
rd_w <= 2'b00;
rs_w <= 2'b00;
imm_w <= 4'b0000;
cond_w <= 2'b00;
alu_result_w <= 8'b0;
branch_taken_w <= 1'b0;
branch_target_w <= 5'b00000;
is_branch_w <= 1'b0;
is_halt_w <= 1'b0;
wb_en_w <= 1'b0;
pc_out <= 5'b00000;
halt_out <= 1'b0;
r0_out <= 8'b0;
for (integer i = 0; i < 4; i = i + 1) begin
rf[i] <= 8'b0;
end
fwd_a <= 2'b00;
fwd_b <= 2'b00;
data_fwd_x <= 8'b0;
data_fwd_w <= 8'b0;
rd_fwd_x <= 2'b00;
rd_fwd_w <= 2'b00;
valid_x <= 1'b0;
valid_w <= 1'b0;
end else begin
// Update outputs
pc_out <= pc_f;
halt_out <= halt_out | halt_w; // Once halted, stay halted
// r0_out reflects the most recently written value of R[0]
// If W stage writes to R0, update r0_out
if (wb_en_w && (rd_w == 2'b00)) begin
r0_out <= alu_result_w;
end else if (rst) begin // Handled in reset block, but for safety
r0_out <= 8'b0;
end
// Update Register File
if (wb_en_w) begin
rf[rd_w] <= alu_result_w;
end
// Update Forwarding Data
data_fwd_x <= alu_result_x;
rd_fwd_x <= rd_x;
valid_x <= ~halt_x; // Valid if not halted
data_fwd_w <= alu_result_w;
rd_fwd_w <= rd_w;
valid_w <= ~halt_w; // Valid if not halted
// Stage F: Fetch
if (!halt_out) begin
// Determine next PC for fetch
// If the previous instruction (now in W) was a taken branch,
// the PC should be the branch target.
// Note: The instruction in the delay slot (now in X) is executed,
// but the PC for the *next* fetch after the delay slot is the target.
// However, standard 1-delay-slot pipelines usually set the PC for the
// fetch stage based on the branch resolution in the W stage.
// Let's refine the PC update logic.
// PC_F is the PC of the instruction currently being fetched.
// At the end of the cycle, we decide what PC_F will be next.
if (halt_w) begin
pc_f <= pc_f; // Freeze
end else if (is_branch_w && branch_taken_w) begin
pc_f <= branch_target_w;
end else begin
pc_f <= pc_f + 1'b1;
end
// Fetch instruction
instr_f <= IMEM_INIT[pc_f * 8 +: 8];
halt_f <= 1'b0;
end else begin
// Already halted, freeze
pc_f <= pc_f;
instr_f <= instr_f;
halt_f <= 1'b1;
end
// Stage X: Decode/Execute
if (halt_out) begin
// Freeze X stage if halted
// But we need to be careful. If halt is asserted in W,
// X should also freeze.
// Actually, halt_out is updated from halt_w.
// If halt_w is 1, halt_out becomes 1 in the same cycle.
// So next cycle, halt_out is 1.
// We should freeze X if halt_out is 1 OR if halt_x was 1?
// Let's just freeze if halt_out is 1.
// Wait, if halt_out is 1, we don't want to advance the pipeline.
// So we keep the current X state.
end else begin
// Move F to X
pc_x <= pc_f;
instr_x <= instr_f;
halt_x <= halt_f;
// Decode
opcode_x <= instr_f[7:6];
if (instr_f[7:6] == 2'b00) begin // ADDI
rd_x <= instr_f[5:4];
rs_x <= 2'b00; // Unused
imm_x <= instr_f[3:0];
cond_x <= 2'b00; // Unused
is_branch_x <= 1'b0;
is_halt_x <= 1'b0;
end else if (instr_f[7:6] == 2'b01) begin // MOV
rd_x <= instr_f[5:4];
rs_x <= instr_f[3:2];
imm_x <= 4'b0000; // Unused
cond_x <= 2'b00; // Unused
is_branch_x <= 1'b0;
is_halt_x <= 1'b0;
end else if (instr_f[7:6] == 2'b10) begin // BRcc
cond_x <= instr_f[5:4];
rd_x <= 2'b00; // Unused
rs_x <= 2'b00; // Unused
imm_x <= instr_f[3:0];
is_branch_x <= 1'b1;
is_halt_x <= 1'b0;
end else begin // HALT
opcode_x <= 2'b11;
rd_x <= 2'b00;
rs_x <= 2'b00;
imm_x <= 4'b0000;
cond_x <= 2'b00;
is_branch_x <= 1'b0;
is_halt_x <= 1'b1;
end
// Execute
if (is_branch_x) begin
// Calculate branch target
branch_target_x <= pc_x + sign_ext4(imm_x) + 1'b1;
// Evaluate condition
// We need the current value of R0 for condition check.
// R0 might be updated by the instruction in W stage.
// So we should use forwarding for R0 read here?
// The condition depends on R0.
// Let's read R0 with forwarding.
signed [7:0] r0_val;
// Forwarding for R0 read in branch condition
if (valid_w && (rd_fwd_w == 2'b00)) begin
r0_val = data_fwd_w;
end else if (valid_x && (rd_fwd_x == 2'b00)) begin
r0_val = data_fwd_x;
end else begin
r0_val = rf[0];
end
if (cond_x == 2'b00) begin // Always
branch_taken_x <= 1'b1;
end else if (cond_x == 2'b01) begin // Zero
branch_taken_x <= (r0_val == 8'b0);
end else if (cond_x == 2'b10) begin // Negative
branch_taken_x <= (r0_val[7] == 1'b1);
end else begin // Positive (non-zero and positive? or just > 0?)
// "pos(R0)" usually means > 0.
branch_taken_x <= (r0_val > 8'b0);
end
alu_result_x <= 8'b0; // Unused
end else if (is_halt_x) begin
alu_result_x <= 8'b0;
branch_taken_x <= 1'b0;
branch_target_x <= 5'b0;
end else begin
// ADDI or MOV
// Read source register (for MOV) or use immediate (for ADDI)
// For ADDI: R[rd] <= R[rd] + imm
// For MOV: R[rd] <= R[rs]
signed [7:0] src_val;
if (opcode_x == 2'b00) begin // ADDI
// Read R[rd]
if (valid_w && (rd_fwd_w == rd_x)) begin
src_val = data_fwd_w;
end else if (valid_x && (rd_fwd_x == rd_x)) begin
src_val = data_fwd_x;
end else begin
src_val = rf[rd_x];
end
alu_result_x <= src_val + sign_ext4(imm_x);
end else if (opcode_x == 2'b01) begin // MOV
// Read R[rs]
if (valid_w && (rd_fwd_w == rs_x)) begin
src_val = data_fwd_w;
end else if (valid_x && (rd_fwd_x == rs_x)) begin
src_val = data_fwd_x;
end else begin
src_val = rf[rs_x];
end
alu_result_x <= src_val;
end else begin
alu_result_x <= 8'b0;
end
branch_taken_x <= 1'b0;
branch_target_x <= 5'b0;
end
end
// Stage W: Writeback
if (halt_out) begin
// Freeze W stage
end else begin
// Move X to W
pc_w <= pc_x;
instr_w <= instr_x;
halt_w <= halt_x;
opcode_w <= opcode_x;
rd_w <= rd_x;
rs_w <= rs_x;
imm_w <= imm_x;
cond_w <= cond_x;
alu_result_w <= alu_result_x;
branch_taken_w <= branch_taken_x;
branch_target_w <= branch_target_x;
is_branch_w <= is_branch_x;
is_halt_w <= is_halt_x;
// Determine if writeback should occur
if (is_halt_w) begin
wb_en_w <= 1'b0;
end else if (is_branch_w) begin
// If branch is taken, the delay slot instruction's result is discarded?
// The problem says: "the instruction after the branch always executes"
// "document whichever convention you choose"
// Convention: Delay slot instruction executes and writes back normally.
// The branch target is loaded into PC for the *next* fetch.
// So we do write back the delay slot instruction.
// However, if the branch is taken, do we discard the writeback?
// Standard MIPS: Delay slot instruction is committed.
// So wb_en_w should be 1 if it's a normal instruction.
// But wait, the instruction in W stage is the one that was in X stage.
// If the instruction in W stage is a branch, it doesn't write back.
// If the instruction in W stage is a normal instruction (ADDI/MOV), it writes back.
// The branch taken status in W stage refers to the branch instruction itself.
// So if is_branch_w is 1, it's a branch instruction, so no writeback.
wb_en_w <= 1'b0;
end else begin
// ADDI or MOV
wb_en_w <= 1'b1;
end
end
// Forwarding Logic for Execute Stage
// We need to forward data to the Execute stage inputs.
// The inputs are R[rs] for MOV and R[rd] for ADDI.
// We already handled the actual data selection in the Execute block above.
// But we need to update the forwarding registers for the next cycle.
// The forwarding logic determines which data to use for the *next* Execute stage.
// So we update fwd_a and fwd_b here based on the current X and W stages.
// For the next cycle's Execute stage, we need to know if we should forward from X or W.
// The instruction in the next X stage will be the current F stage instruction.
// Let's decode the current F stage instruction to see what it needs.
reg [1:0] next_rd;
reg [1:0] next_rs;
reg next_is_mov;
reg next_is_addi;
if (instr_f[7:6] == 2'b00) begin // ADDI
next_rd = instr_f[5:4];
next_rs = 2'b00;
next_is_addi = 1'b1;
next_is_mov = 1'b0;
end else if (instr_f[7:6] == 2'b01) begin // MOV
next_rd = instr_f[5:4];
next_rs = instr_f[3:2];
next_is_addi = 1'b0;
next_is_mov = 1'b1;
end else begin
next_rd = 2'b00;
next_rs = 2'b00;
next_is_addi = 1'b0;
next_is_mov = 1'b0;
end
// Determine forwarding for the next instruction (currently in F)
// It will move to X in the next cycle.
// We need to forward to its source registers.
// Forwarding for ADDI: needs R[next_rd]
if (next_is_addi) begin
if (valid_w && (rd_fwd_w == next_rd)) begin
fwd_a <= 2'b10; // Forward from W
end else if (valid_x && (rd_fwd_x == next_rd)) begin
fwd_a <= 2'b01; // Forward from X
end else begin
fwd_a <= 2'b00; // No forward
end
fwd_b <= 2'b00; // ADDI only has one source register
end else if (next_is_mov) begin
// Forwarding for MOV: needs R[next_rs]
if (valid_w && (rd_fwd_w == next_rs)) begin
fwd_b <= 2'b10; // Forward from W
end else if (valid_x && (rd_fwd_x == next_rs)) begin
fwd_b <= 2'b01; // Forward from X
end else begin
fwd_b <= 2'b00; // No forward
end
fwd_a <= 2'b00; // MOV only has one source register
end else begin
fwd_a <= 2'b00;
fwd_b <= 2'b00;
end
end
end
endmodule
```