no functional run (did not compile)
(no automated diagnostics fired)
module mcu3 #(
parameter [0:255] IMEM_INIT = 256'h0 // 32 instruction bytes, byte 0 at MSB
) (
input clk,
input rst, // synchronous, active-high
output reg [4:0] pc_out, // current fetched PC (visible for testing)
output reg halt_out, // asserted forever after HALT executes
output reg signed [7:0] r0_out // current value of register R0
);
// Register File: 4 registers, 8-bit signed
reg signed [7:0] rf [0:3];
// Pipeline Stage Registers
// Fetch Stage
reg [4:0] pc_f;
reg [7:0] instr_f;
reg [4:0] npc_f; // Next PC candidate (PC+1)
// Execute Stage
reg [4:0] pc_x;
reg [7:0] instr_x;
reg [4:0] npc_x; // Next PC candidate from F stage
reg [1:0] opcode_x;
reg [1:0] rd_x;
reg [1:0] rs_x;
reg [3:0] imm_x;
reg [1:0] cond_x;
reg [3:0] off_x;
reg signed [7:0] rs_val_x; // Read value of rs
reg signed [7:0] alu_res_x; // ALU result
reg is_branch_x;
reg branch_taken_x;
reg [4:0] branch_target_x;
reg is_halt_x;
// Writeback Stage
reg [4:0] pc_w;
reg [7:0] instr_w;
reg [4:0] npc_w;
reg [1:0] opcode_w;
reg [1:0] rd_w;
reg signed [7:0] alu_res_w;
reg is_branch_w;
reg branch_taken_w;
reg [4:0] branch_target_w;
reg is_halt_w;
// Forwarding Logic
// We need to forward values from the Execute stage (which will be written in W)
// and the Writeback stage (which has just been written or is about to be committed)
// to the Execute stage's rs read.
// Since we have no stalls, we must forward immediately.
// The "current" value of a register is either in the RF, or being produced by X or W.
// Note: In a 3-stage pipeline, the result in X is available in the next cycle in W.
// However, for the instruction in X, we need the value of rs.
// If the previous instruction (in W) writes to rd == rs of current (in X), use W's result.
// If the instruction before that (in X) writes to rd == rs of current (in X), use X's result.
reg signed [7:0] fwd_val_x;
reg fwd_from_w;
reg fwd_from_x;
// Decode logic for current instruction in X
always @(*) begin
opcode_x = instr_x[7:6];
if (opcode_x == 2'b00) begin // ADDI
rd_x = instr_x[5:4];
imm_x = instr_x[3:0];
rs_x = 2'b00; // Dummy
cond_x = 2'b00; // Dummy
off_x = 4'b0000; // Dummy
is_branch_x = 1'b0;
is_halt_x = 1'b0;
end else if (opcode_x == 2'b01) begin // MOV
rd_x = instr_x[5:4];
rs_x = instr_x[3:2];
imm_x = 4'b0000; // Dummy
cond_x = 2'b00; // Dummy
off_x = 4'b0000; // Dummy
is_branch_x = 1'b0;
is_halt_x = 1'b0;
end else if (opcode_x == 2'b10) begin // BRcc
cond_x = instr_x[5:4];
off_x = instr_x[3:0];
rd_x = 2'b00; // Dummy
rs_x = 2'b00; // Dummy
imm_x = 4'b0000; // Dummy
is_branch_x = 1'b1;
is_halt_x = 1'b0;
end else begin // HALT
rd_x = 2'b00;
rs_x = 2'b00;
imm_x = 4'b0000;
cond_x = 2'b00;
off_x = 4'b0000;
is_branch_x = 1'b0;
is_halt_x = 1'b1;
end
end
// ALU and Branch Logic in Execute Stage
always @(*) begin
// Default
alu_res_x = 8'b0;
branch_taken_x = 1'b0;
branch_target_x = 5'b00000;
if (is_halt_x) begin
alu_res_x = 8'b0;
end else if (opcode_x == 2'b00) begin // ADDI
// Sign extend imm[4] to 8 bits
signed [7:0] imm_sext = {{4{imm_x[3]}}, imm_x};
alu_res_x = rs_val_x + imm_sext;
end else if (opcode_x == 2'b01) begin // MOV
alu_res_x = rs_val_x;
end else if (is_branch_x) begin
// Compute branch target: PC + sign_ext(off, 4) + 1
// The PC used here is the PC of the branch instruction itself (pc_x)
signed [4:0] off_sext = {{5{off_x[3]}}, off_x};
branch_target_x = pc_x + off_sext + 1'b1;
// Evaluate condition based on R0
// We need the current value of R0.
// Note: The branch condition depends on R0.
// If R0 was updated by the previous instruction (in W or X), we must use the forwarded value.
// However, the forwarding logic below calculates rs_val_x.
// For branches, rs is dummy, but we need R0 specifically.
// Let's handle R0 forwarding separately for branch conditions.
end
end
// Determine the value of R0 for branch condition evaluation
reg signed [7:0] r0_for_branch;
always @(*) begin
// Check if W stage is writing to R0
if (opcode_w == 2'b00 && rd_w == 2'b00) begin
r0_for_branch = alu_res_w;
end else if (opcode_w == 2'b01 && rd_w == 2'b00) begin
r0_for_branch = alu_res_w;
end else begin
// Check if X stage is writing to R0
if (opcode_x == 2'b00 && rd_x == 2'b00) begin
r0_for_branch = alu_res_x;
end else if (opcode_x == 2'b01 && rd_x == 2'b00) begin
r0_for_branch = alu_res_x;
end else begin
r0_for_branch = rf[0];
end
end
end
// Evaluate branch condition
always @(*) begin
if (is_branch_x) begin
case (cond_x)
2'b00: branch_taken_x = 1'b1; // Always
2'b01: branch_taken_x = (r0_for_branch == 8'b0); // Zero
2'b10: branch_taken_x = (r0_for_branch < 8'b0); // Negative
2'b11: branch_taken_x = (r0_for_branch > 8'b0); // Positive
default: branch_taken_x = 1'b0;
endcase
end else begin
branch_taken_x = 1'b0;
end
end
// Forwarding Logic for rs in Execute Stage
always @(*) begin
// Default: read from register file
fwd_from_w = 1'b0;
fwd_from_x = 1'b0;
fwd_val_x = rf[rs_x];
// Priority: W stage over X stage
// Check if W stage is writing to the register we need (rs_x)
// W stage writes if opcode is ADDI or MOV
if ((opcode_w == 2'b00 || opcode_w == 2'b01) && rd_w == rs_x) begin
fwd_from_w = 1'b1;
fwd_val_x = alu_res_w;
end else if ((opcode_x == 2'b00 || opcode_x == 2'b01) && rd_x == rs_x) begin
fwd_from_x = 1'b1;
fwd_val_x = alu_res_x;
end else begin
fwd_val_x = rf[rs_x];
end
end
// Assign rs_val_x for use in ALU
assign rs_val_x = fwd_val_x;
// Pipeline Register Updates
always @(posedge clk) begin
if (rst) begin
// Reset
pc_f <= 5'b00000;
instr_f <= 8'b0;
npc_f <= 5'b00001;
pc_x <= 5'b00000;
instr_x <= 8'b0;
npc_x <= 5'b00001;
opcode_x <= 2'b00;
rd_x <= 2'b00;
rs_x <= 2'b00;
imm_x <= 4'b0000;
cond_x <= 2'b00;
off_x <= 4'b0000;
rs_val_x <= 8'b0;
alu_res_x <= 8'b0;
is_branch_x <= 1'b0;
branch_taken_x <= 1'b0;
branch_target_x <= 5'b00000;
is_halt_x <= 1'b0;
pc_w <= 5'b00000;
instr_w <= 8'b0;
npc_w <= 5'b00001;
opcode_w <= 2'b00;
rd_w <= 2'b00;
alu_res_w <= 8'b0;
is_branch_w <= 1'b0;
branch_taken_w <= 1'b0;
branch_target_w <= 5'b00000;
is_halt_w <= 1'b0;
// Reset Register File
rf[0] <= 8'b0;
rf[1] <= 8'b0;
rf[2] <= 8'b0;
rf[3] <= 8'b0;
pc_out <= 5'b00000;
halt_out <= 1'b0;
r0_out <= 8'b0;
end else if (halt_out) begin
// Freeze pipeline
end else begin
// Update Writeback Stage
pc_w <= pc_x;
instr_w <= instr_x;
npc_w <= npc_x;
opcode_w <= opcode_x;
rd_w <= rd_x;
alu_res_w <= alu_res_x;
is_branch_w <= is_branch_x;
branch_taken_w <= branch_taken_x;
branch_target_w <= branch_target_x;
is_halt_w <= is_halt_x;
// Update Execute Stage
pc_x <= pc_f;
instr_x <= instr_f;
npc_x <= npc_f;
// Decode fields are computed combinatorially from instr_x, so no need to store them explicitly in the register if we recompute.
// However, we stored them in the always block above. Let's just update the registers that feed into the combinatorial logic.
// Actually, the always block for decoding uses instr_x. So we just need to update instr_x.
// The other fields (opcode_x, etc.) are derived from instr_x in the always @(*) block.
// So we don't need to store opcode_x, rd_x, etc. in the pipeline register if we recompute them.
// But wait, the forwarding logic and ALU logic depend on these fields.
// Since they are derived from instr_x, and instr_x is updated, they will be updated in the next cycle.
// So we only need to store instr_x, pc_x, npc_x, and the results of the ALU/Branch logic?
// No, the ALU/Branch logic is combinatorial based on instr_x and rs_val_x.
// So we just need to update instr_x, pc_x, npc_x.
// The other signals (opcode_x, etc.) are just wires/regs that are updated combinatorially or in the next cycle?
// Let's keep them as regs for clarity, but they are updated from the previous stage's values?
// No, they are decoded from instr_x. So if we update instr_x, the next cycle's decode will use the new instr_x.
// So we don't need to store opcode_x, etc. in the pipeline register.
// But we do need to store the ALU result and branch info for the next stage (W).
// So let's update the W stage with the current X stage's results.
// And update the X stage with the current F stage's values.
// Update Fetch Stage
// Determine next PC for F stage
if (is_halt_w) begin
// Halt executed, freeze
pc_f <= pc_f; // Freeze
end else if (is_branch_w) begin
if (branch_taken_w) begin
pc_f <= branch_target_w;
end else begin
pc_f <= npc_w; // Sequential PC from delay slot
end
end else begin
pc_f <= npc_w; // Sequential PC
end
// Fetch instruction
if (!halt_out) begin
instr_f <= IMEM_INIT[pc_f * 8 +: 8];
end
// Compute next PC for F stage (PC + 1)
npc_f <= pc_f + 1'b1;
// Update Outputs
pc_out <= pc_f;
// Update Register File and r0_out
if (is_halt_w) begin
// Do nothing
end else if (opcode_w == 2'b00 || opcode_w == 2'b01) begin
rf[rd_w] <= alu_res_w;
if (rd_w == 2'b00) begin
r0_out <= alu_res_w;
end
end else begin
// For branches and HALT, no register write
// But r0_out should reflect the current R0.
// If R0 was not written in this cycle, it keeps its value.
// However, if a branch condition changed R0? No, branches don't write R0.
// So r0_out only changes if rd_w == 0.
end
// Update Halt Output
if (is_halt_w) begin
halt_out <= 1'b1;
end
end
end
endmodule```verilog
// Branch convention: The instruction immediately following a branch is always
// executed (1 branch delay slot). The branch target PC is computed in the
// Execute stage and committed in the Writeback stage. If the branch condition
// is met, the PC is updated in the Writeback stage; otherwise, the PC continues
// sequentially (PC + 1 from the delay slot instruction).
module mcu3 #(
parameter [0:255] IMEM_INIT = 256'h0 // 32 instruction bytes, byte 0 at MSB
) (
input clk,
input rst, // synchronous, active-high
output reg [4:0] pc_out, // current fetched PC (visible for testing)
output reg halt_out, // asserted forever after HALT executes
output reg signed [7:0] r0_out // current value of register R0
);
// Register File: 4 registers, 8-bit signed
reg signed [7:0] rf [0:3];
// Pipeline Stage Registers
// Fetch Stage
reg [4:0] pc_f;
reg [7:0] instr_f;
reg [4:0] npc_f; // Next PC candidate (PC+1)
// Execute Stage
reg [4:0] pc_x;
reg [7:0] instr_x;
reg [4:0] npc_x; // Next PC candidate from F stage
reg [1:0] opcode_x;
reg [1:0] rd_x;
reg [1:0] rs_x;
reg [3:0] imm_x;
reg [1:0] cond_x;
reg [3:0] off_x;
reg signed [7:0] rs_val_x; // Read value of rs
reg signed [7:0] alu_res_x; // ALU result
reg is_branch_x;
reg branch_taken_x;
reg [4:0] branch_target_x;
reg is_halt_x;
// Writeback Stage
reg [4:0] pc_w;
reg [7:0] instr_w;
reg [4:0] npc_w;
reg [1:0] opcode_w;
reg [1:0] rd_w;
reg signed [7:0] alu_res_w;
reg is_branch_w;
reg branch_taken_w;
reg [4:0] branch_target_w;
reg is_halt_w;
// Forwarding Logic
// We need to forward values from the Execute stage (which will be written in W)
// and the Writeback stage (which has just been written or is about to be committed)
// to the Execute stage's rs read.
// Since we have no stalls, we must forward immediately.
// The "current" value of a register is either in the RF, or being produced by X or W.
// Note: In a 3-stage pipeline, the result in X is available in the next cycle in W.
// However, for the instruction in X, we need the value of rs.
// If the previous instruction (in W) writes to rd == rs of current (in X), use W's result.
// If the instruction before that (in X) writes to rd == rs of current (in X), use X's result.
reg signed [7:0] fwd_val_x;
reg fwd_from_w;
reg fwd_from_x;
// Decode logic for current instruction in X
always @(*) begin
opcode_x = instr_x[7:6];
if (opcode_x == 2'b00) begin // ADDI
rd_x = instr_x[5:4];
imm_x = instr_x[3:0];
rs_x = 2'b00; // Dummy
cond_x = 2'b00; // Dummy
off_x = 4'b0000; // Dummy
is_branch_x = 1'b0;
is_halt_x = 1'b0;
end else if (opcode_x == 2'b01) begin // MOV
rd_x = instr_x[5:4];
rs_x = instr_x[3:2];
imm_x = 4'b0000; // Dummy
cond_x = 2'b00; // Dummy
off_x = 4'b0000; // Dummy
is_branch_x = 1'b0;
is_halt_x = 1'b0;
end else if (opcode_x == 2'b10) begin // BRcc
cond_x = instr_x[5:4];
off_x = instr_x[3:0];
rd_x = 2'b00; // Dummy
rs_x = 2'b00; // Dummy
imm_x = 4'b0000; // Dummy
is_branch_x = 1'b1;
is_halt_x = 1'b0;
end else begin // HALT
rd_x = 2'b00;
rs_x = 2'b00;
imm_x = 4'b0000;
cond_x = 2'b00;
off_x = 4'b0000;
is_branch_x = 1'b0;
is_halt_x = 1'b1;
end
end
// ALU and Branch Logic in Execute Stage
always @(*) begin
// Default
alu_res_x = 8'b0;
branch_taken_x = 1'b0;
branch_target_x = 5'b00000;
if (is_halt_x) begin
alu_res_x = 8'b0;
end else if (opcode_x == 2'b00) begin // ADDI
// Sign extend imm[4] to 8 bits
signed [7:0] imm_sext = {{4{imm_x[3]}}, imm_x};
alu_res_x = rs_val_x + imm_sext;
end else if (opcode_x == 2'b01) begin // MOV
alu_res_x = rs_val_x;
end else if (is_branch_x) begin
// Compute branch target: PC + sign_ext(off, 4) + 1
// The PC used here is the PC of the branch instruction itself (pc_x)
signed [4:0] off_sext = {{5{off_x[3]}}, off_x};
branch_target_x = pc_x + off_sext + 1'b1;
// Evaluate condition based on R0
// We need the current value of R0.
// Note: The branch condition depends on R0.
// If R0 was updated by the previous instruction (in W or X), we must use the forwarded value.
// However, the forwarding logic below calculates rs_val_x.
// For branches, rs is dummy, but we need R0 specifically.
// Let's handle R0 forwarding separately for branch conditions.
end
end
// Determine the value of R0 for branch condition evaluation
reg signed [7:0] r0_for_branch;
always @(*) begin
// Check if W stage is writing to R0
if (opcode_w == 2'b00 && rd_w == 2'b00) begin
r0_for_branch = alu_res_w;
end else if (opcode_w == 2'b01 && rd_w == 2'b00) begin
r0_for_branch = alu_res_w;
end else begin
// Check if X stage is writing to R0
if (opcode_x == 2'b00 && rd_x == 2'b00) begin
r0_for_branch = alu_res_x;
end else if (opcode_x == 2'b01 && rd_x == 2'b00) begin
r0_for_branch = alu_res_x;
end else begin
r0_for_branch = rf[0];
end
end
end
// Evaluate branch condition
always @(*) begin
if (is_branch_x) begin
case (cond_x)
2'b00: branch_taken_x = 1'b1; // Always
2'b01: branch_taken_x = (r0_for_branch == 8'b0); // Zero
2'b10: branch_taken_x = (r0_for_branch < 8'b0); // Negative
2'b11: branch_taken_x = (r0_for_branch > 8'b0); // Positive
default: branch_taken_x = 1'b0;
endcase
end else begin
branch_taken_x = 1'b0;
end
end
// Forwarding Logic for rs in Execute Stage
always @(*) begin
// Default: read from register file
fwd_from_w = 1'b0;
fwd_from_x = 1'b0;
fwd_val_x = rf[rs_x];
// Priority: W stage over X stage
// Check if W stage is writing to the register we need (rs_x)
// W stage writes if opcode is ADDI or MOV
if ((opcode_w == 2'b00 || opcode_w == 2'b01) && rd_w == rs_x) begin
fwd_from_w = 1'b1;
fwd_val_x = alu_res_w;
end else if ((opcode_x == 2'b00 || opcode_x == 2'b01) && rd_x == rs_x) begin
fwd_from_x = 1'b1;
fwd_val_x = alu_res_x;
end else begin
fwd_val_x = rf[rs_x];
end
end
// Assign rs_val_x for use in ALU
assign rs_val_x = fwd_val_x;
// Pipeline Register Updates
always @(posedge clk) begin
if (rst) begin
// Reset
pc_f <= 5'b00000;
instr_f <= 8'b0;
npc_f <= 5'b00001;
pc_x <= 5'b00000;
instr_x <= 8'b0;
npc_x <= 5'b00001;
opcode_x <= 2'b00;
rd_x <= 2'b00;
rs_x <= 2'b00;
imm_x <= 4'b0000;
cond_x <= 2'b00;
off_x <= 4'b0000;
rs_val_x <= 8'b0;
alu_res_x <= 8'b0;
is_branch_x <= 1'b0;
branch_taken_x <= 1'b0;
branch_target_x <= 5'b00000;
is_halt_x <= 1'b0;
pc_w <= 5'b00000;
instr_w <= 8'b0;
npc_w <= 5'b00001;
opcode_w <= 2'b00;
rd_w <= 2'b00;
alu_res_w <= 8'b0;
is_branch_w <= 1'b0;
branch_taken_w <= 1'b0;
branch_target_w <= 5'b00000;
is_halt_w <= 1'b0;
// Reset Register File
rf[0] <= 8'b0;
rf[1] <= 8'b0;
rf[2] <= 8'b0;
rf[3] <= 8'b0;
pc_out <= 5'b00000;
halt_out <= 1'b0;
r0_out <= 8'b0;
end else if (halt_out) begin
// Freeze pipeline
end else begin
// Update Writeback Stage
pc_w <= pc_x;
instr_w <= instr_x;
npc_w <= npc_x;
opcode_w <= opcode_x;
rd_w <= rd_x;
alu_res_w <= alu_res_x;
is_branch_w <= is_branch_x;
branch_taken_w <= branch_taken_x;
branch_target_w <= branch_target_x;
is_halt_w <= is_halt_x;
// Update Execute Stage
pc_x <= pc_f;
instr_x <= instr_f;
npc_x <= npc_f;
// Decode fields are computed combinatorially from instr_x, so no need to store them explicitly in the register if we recompute.
// However, we stored them in the always block above. Let's just update the registers that feed into the combinatorial logic.
// Actually, the always block for decoding uses instr_x. So we just need to update instr_x.
// The other fields (opcode_x, etc.) are derived from instr_x in the always @(*) block.
// So we don't need to store opcode_x, rd_x, etc. in the pipeline register if we recompute them.
// But wait, the forwarding logic and ALU logic depend on these fields.
// Since they are derived from instr_x, and instr_x is updated, they will be updated in the next cycle.
// So we only need to store instr_x, pc_x, npc_x, and the results of the ALU/Branch logic?
// No, the ALU/Branch logic is combinatorial based on instr_x and rs_val_x.
// So we just need to update instr_x, pc_x, npc_x.
// The other signals (opcode_x, etc.) are just wires/regs that are updated combinatorially or in the next cycle?
// Let's keep them as regs for clarity, but they are updated from the previous stage's values?
// No, they are decoded from instr_x. So if we update instr_x, the next cycle's decode will use the new instr_x.
// So we don't need to store opcode_x, etc. in the pipeline register.
// But we do need to store the ALU result and branch info for the next stage (W).
// So let's update the W stage with the current X stage's results.
// And update the X stage with the current F stage's values.
// Update Fetch Stage
// Determine next PC for F stage
if (is_halt_w) begin
// Halt executed, freeze
pc_f <= pc_f; // Freeze
end else if (is_branch_w) begin
if (branch_taken_w) begin
pc_f <= branch_target_w;
end else begin
pc_f <= npc_w; // Sequential PC from delay slot
end
end else begin
pc_f <= npc_w; // Sequential PC
end
// Fetch instruction
if (!halt_out) begin
instr_f <= IMEM_INIT[pc_f * 8 +: 8];
end
// Compute next PC for F stage (PC + 1)
npc_f <= pc_f + 1'b1;
// Update Outputs
pc_out <= pc_f;
// Update Register File and r0_out
if (is_halt_w) begin
// Do nothing
end else if (opcode_w == 2'b00 || opcode_w == 2'b01) begin
rf[rd_w] <= alu_res_w;
if (rd_w == 2'b00) begin
r0_out <= alu_res_w;
end
end else begin
// For branches and HALT, no register write
// But r0_out should reflect the current R0.
// If R0 was not written in this cycle, it keeps its value.
// However, if a branch condition changed R0? No, branches don't write R0.
// So r0_out only changes if rd_w == 0.
end
// Update Halt Output
if (is_halt_w) begin
halt_out <= 1'b1;
end
end
end
endmodule
```