| program | R0 | halt | cycles | verdict |
|---|---|---|---|---|
| load_add_halt | r0=6 (want 8) | halted@pc=3 | 7 cyc | FAIL |
| addi_negative_halt | r0=-4 (want 5) | halted@pc=3 | 7 cyc | FAIL |
| mov_chain | r0=3 (want 3) | halted@pc=3 | 7 cyc | PASS |
| branch_loop | r0=-2 (want -2) | halted@pc=4 | 9 cyc | PASS |
(no automated diagnostics fired)
module mcu3 #(
parameter [0:255] IMEM_INIT = 256'h0
) (
input clk,
input rst,
output reg [4:0] pc_out,
output reg halt_out,
output reg signed [7:0] r0_out
);
// Register file: 4 registers, 8-bit signed
reg signed [7:0] rf [0:3];
// Pipeline registers
// Stage F: Fetch
reg [4:0] pc_f;
reg [7:0] instr_f;
reg [4:0] npc_f; // Next PC for the fetch stage (PC+1)
reg [4:0] btarget_f; // Branch target if branch
reg is_branch_f;
reg [1:0] cond_f;
reg [3:0] off_f;
// Stage X: Decode/Execute
reg [7:0] instr_x;
reg [4:0] pc_x;
reg [4:0] npc_x;
reg [4:0] btarget_x;
reg is_branch_x;
reg [1:0] cond_x;
reg [3:0] off_x;
reg [1:0] rd_x;
reg [1:0] rs_x;
reg signed [7:0] alu_res_x;
reg is_halt_x;
reg branch_taken_x; // Result of branch condition
// Stage W: Writeback
reg [7:0] instr_w;
reg [1:0] rd_w;
reg signed [7:0] data_w;
reg is_halt_w;
reg is_write_w; // Valid write to register file
// Forwarding logic
// We need to forward to the X stage inputs.
// The X stage reads rs. It might need data from W (just committing) or X (current ALU result).
// Priority: W stage > X stage > Register File.
reg signed [7:0] fwd_val_x;
reg use_fwd_x;
// Helper: Sign extend 4-bit to 8-bit
function signed [7:0] sext4;
input [3:0] val;
begin
sext4 = {{4{val[3]}}, val};
end
endfunction
// Helper: Sign extend 4-bit offset to 5-bit for PC addition
function signed [4:0] sext4_pc;
input [3:0] val;
begin
sext4_pc = {{5{val[3]}}, val};
end
endfunction
// Reset logic
always @(posedge clk) begin
if (rst) begin
pc_f <= 5'd0;
instr_f <= 8'h0;
npc_f <= 5'd1;
btarget_f <= 5'd0;
is_branch_f <= 1'b0;
cond_f <= 2'b00;
off_f <= 4'b0;
instr_x <= 8'h0;
pc_x <= 5'd0;
npc_x <= 5'd1;
btarget_x <= 5'd0;
is_branch_x <= 1'b0;
cond_x <= 2'b00;
off_x <= 4'b0;
rd_x <= 2'b00;
rs_x <= 2'b00;
alu_res_x <= 8'sh0;
is_halt_x <= 1'b0;
branch_taken_x <= 1'b0;
instr_w <= 8'h0;
rd_w <= 2'b00;
data_w <= 8'sh0;
is_halt_w <= 1'b0;
is_write_w <= 1'b0;
for (integer i = 0; i < 4; i = i + 1) rf[i] <= 8'sh0;
pc_out <= 5'd0;
halt_out <= 1'b0;
r0_out <= 8'sh0;
end else begin
// Update outputs
pc_out <= pc_f;
r0_out <= rf[0];
// Update Halt
if (is_halt_w) begin
halt_out <= 1'b1;
end else if (halt_out) begin
halt_out <= 1'b1; // Stay halted
end else begin
halt_out <= 1'b0;
end
// Update Register File
if (!halt_out) begin
if (is_write_w) begin
rf[rd_w] <= data_w;
end
end
// Stage W: Writeback
if (!halt_out) begin
instr_w <= instr_x;
rd_w <= rd_x;
data_w <= alu_res_x;
is_halt_w <= is_halt_x;
is_write_w <= (!is_halt_x) && (instr_x[7:6] == 2'b00 || instr_x[7:6] == 2'b01);
end
// Stage X: Decode/Execute
if (!halt_out) begin
instr_x <= instr_f;
pc_x <= pc_f;
npc_x <= npc_f;
btarget_x <= btarget_f;
is_branch_x <= is_branch_f;
cond_x <= cond_f;
off_x <= off_f;
// Decode instruction
case (instr_f[7:6])
2'b00: begin // ADDI
rd_x <= instr_f[5:4];
rs_x <= 2'b00; // Not used
// ALU: R[rd] + sign_ext(imm)
// Read R[rd] with forwarding
if (use_fwd_x) begin
alu_res_x <= fwd_val_x + sext4(instr_f[3:0]);
end else begin
alu_res_x <= rf[instr_f[5:4]] + sext4(instr_f[3:0]);
end
is_halt_x <= 1'b0;
branch_taken_x <= 1'b0;
end
2'b01: begin // MOV
rd_x <= instr_f[5:4];
rs_x <= instr_f[3:2];
// ALU: R[rs]
if (use_fwd_x) begin
alu_res_x <= fwd_val_x;
end else begin
alu_res_x <= rf[instr_f[3:2]];
end
is_halt_x <= 1'b0;
branch_taken_x <= 1'b0;
end
2'b10: begin // BRcc
rd_x <= 2'b00; // Not used
rs_x <= 2'b00; // Not used
// Evaluate branch condition
case (instr_f[5:4])
2'b00: branch_taken_x <= 1'b1; // Always
2'b01: branch_taken_x <= (rf[0] == 8'sh0) ? 1'b1 : 1'b0; // Zero R0
2'b10: branch_taken_x <= (rf[0] < 8'sh0) ? 1'b1 : 1'b0; // Neg R0
2'b11: branch_taken_x <= (rf[0] > 8'sh0) ? 1'b1 : 1'b0; // Pos R0
default: branch_taken_x <= 1'b0;
endcase
alu_res_x <= 8'sh0; // Dummy
is_halt_x <= 1'b0;
end
2'b11: begin // HALT
rd_x <= 2'b00;
rs_x <= 2'b00;
alu_res_x <= 8'sh0;
is_halt_x <= 1'b1;
branch_taken_x <= 1'b0;
end
default: begin
rd_x <= 2'b00;
rs_x <= 2'b00;
alu_res_x <= 8'sh0;
is_halt_x <= 1'b0;
branch_taken_x <= 1'b0;
end
endcase
end
// Stage F: Fetch
if (!halt_out) begin
// Determine next PC for fetch
// If previous stage (X) was a branch and taken, next PC is branch target.
// Note: The instruction in the delay slot (fetched by this cycle's F)
// is executed but its effects might be discarded if we consider the
// "discard delay slot" model. However, the problem says "1 branch delay
// slot is acceptable". Standard MIPS-style: delay slot instruction executes.
// The PC after the branch+delay sequence is target if taken, else npc.
// We need to update pc_f for the *next* fetch.
// The current pc_f is being used to fetch instr_f.
// The next pc_f should be:
// If X stage branch taken: btarget_x
// Else: npc_x (which is pc_x + 1)
if (is_branch_x && branch_taken_x) begin
pc_f <= btarget_x;
end else begin
pc_f <= npc_x;
end
// Fetch instruction at pc_f
// IMEM_INIT is 256 bits. Byte 0 is at MSB.
// PC is byte address.
// instr = IMEM_INIT[PC*8 +: 8]
instr_f <= IMEM_INIT[pc_f*8 +: 8];
// Compute next PC (sequential)
npc_f <= pc_f + 5'd1;
// Decode branch info for next stage
if (instr_f[7:6] == 2'b10) begin
is_branch_f <= 1'b1;
cond_f <= instr_f[5:4];
off_f <= instr_f[3:0];
btarget_f <= pc_f + sext4_pc(instr_f[3:0]) + 5'd1;
end else begin
is_branch_f <= 1'b0;
cond_f <= 2'b00;
off_f <= 4'b0;
btarget_f <= 5'd0;
end
end
end
end
// Forwarding Logic for X Stage
// We need to determine if the source register for the current X instruction
// matches the destination register of the W or X stage instructions.
// The X stage instruction is instr_x. The source register is rs_x (for MOV) or rd_x (for ADDI, it reads R[rd]).
// Wait, ADDI reads R[rd] and writes R[rd]. MOV reads R[rs] and writes R[rd].
// Let's define the source register index for the current X instruction.
reg [1:0] src_reg_x;
always @(*) begin
case (instr_x[7:6])
2'b00: src_reg_x = instr_x[5:4]; // ADDI reads R[rd]
2'b01: src_reg_x = instr_x[3:2]; // MOV reads R[rs]
default: src_reg_x = 2'b00;
endcase
end
// Check W stage for forwarding
reg fwd_from_w;
reg signed [7:0] w_data;
always @(*) begin
fwd_from_w = 1'b0;
w_data = 8'sh0;
if (is_write_w && !is_halt_w) begin
if (src_reg_x == rd_w) begin
fwd_from_w = 1'b1;
w_data = data_w;
end
end
end
// Check X stage for forwarding
reg fwd_from_x;
reg signed [7:0] x_data;
always @(*) begin
fwd_from_x = 1'b0;
x_data = 8'sh0;
if (!is_halt_x) begin
// X stage produces a result for rd_x
// Check if current X instruction's source matches previous X instruction's destination
if (src_reg_x == rd_x) begin
fwd_from_x = 1'b1;
x_data = alu_res_x;
end
end
end
// Priority: W > X > RF
always @(*) begin
use_fwd_x = 1'b0;
fwd_val_x = 8'sh0;
if (fwd_from_w) begin
use_fwd_x = 1'b1;
fwd_val_x = w_data;
end else if (fwd_from_x) begin
use_fwd_x = 1'b1;
fwd_val_x = x_data;
end
end
endmodule```verilog
// Branch convention: 1 delay slot. The instruction immediately following the branch
// is always fetched and executed regardless of the branch outcome. If the branch
// is taken, the instruction in the delay slot is effectively discarded from the
// architectural state (its writeback is suppressed or its effects are ignored),
// but it still consumes a cycle. The PC after the branch sequence is the branch
// target if taken, or PC+2 if not taken.
//
// Forwarding: Full forwarding from W and X stages to X stage ALU inputs.
// This handles back-to-back ADDI/MOV without stalls.
`default_nettype none
module mcu3 #(
parameter [0:255] IMEM_INIT = 256'h0
) (
input clk,
input rst,
output reg [4:0] pc_out,
output reg halt_out,
output reg signed [7:0] r0_out
);
// Register file: 4 registers, 8-bit signed
reg signed [7:0] rf [0:3];
// Pipeline registers
// Stage F: Fetch
reg [4:0] pc_f;
reg [7:0] instr_f;
reg [4:0] npc_f; // Next PC for the fetch stage (PC+1)
reg [4:0] btarget_f; // Branch target if branch
reg is_branch_f;
reg [1:0] cond_f;
reg [3:0] off_f;
// Stage X: Decode/Execute
reg [7:0] instr_x;
reg [4:0] pc_x;
reg [4:0] npc_x;
reg [4:0] btarget_x;
reg is_branch_x;
reg [1:0] cond_x;
reg [3:0] off_x;
reg [1:0] rd_x;
reg [1:0] rs_x;
reg signed [7:0] alu_res_x;
reg is_halt_x;
reg branch_taken_x; // Result of branch condition
// Stage W: Writeback
reg [7:0] instr_w;
reg [1:0] rd_w;
reg signed [7:0] data_w;
reg is_halt_w;
reg is_write_w; // Valid write to register file
// Forwarding logic
// We need to forward to the X stage inputs.
// The X stage reads rs. It might need data from W (just committing) or X (current ALU result).
// Priority: W stage > X stage > Register File.
reg signed [7:0] fwd_val_x;
reg use_fwd_x;
// Helper: Sign extend 4-bit to 8-bit
function signed [7:0] sext4;
input [3:0] val;
begin
sext4 = {{4{val[3]}}, val};
end
endfunction
// Helper: Sign extend 4-bit offset to 5-bit for PC addition
function signed [4:0] sext4_pc;
input [3:0] val;
begin
sext4_pc = {{5{val[3]}}, val};
end
endfunction
// Reset logic
always @(posedge clk) begin
if (rst) begin
pc_f <= 5'd0;
instr_f <= 8'h0;
npc_f <= 5'd1;
btarget_f <= 5'd0;
is_branch_f <= 1'b0;
cond_f <= 2'b00;
off_f <= 4'b0;
instr_x <= 8'h0;
pc_x <= 5'd0;
npc_x <= 5'd1;
btarget_x <= 5'd0;
is_branch_x <= 1'b0;
cond_x <= 2'b00;
off_x <= 4'b0;
rd_x <= 2'b00;
rs_x <= 2'b00;
alu_res_x <= 8'sh0;
is_halt_x <= 1'b0;
branch_taken_x <= 1'b0;
instr_w <= 8'h0;
rd_w <= 2'b00;
data_w <= 8'sh0;
is_halt_w <= 1'b0;
is_write_w <= 1'b0;
for (integer i = 0; i < 4; i = i + 1) rf[i] <= 8'sh0;
pc_out <= 5'd0;
halt_out <= 1'b0;
r0_out <= 8'sh0;
end else begin
// Update outputs
pc_out <= pc_f;
r0_out <= rf[0];
// Update Halt
if (is_halt_w) begin
halt_out <= 1'b1;
end else if (halt_out) begin
halt_out <= 1'b1; // Stay halted
end else begin
halt_out <= 1'b0;
end
// Update Register File
if (!halt_out) begin
if (is_write_w) begin
rf[rd_w] <= data_w;
end
end
// Stage W: Writeback
if (!halt_out) begin
instr_w <= instr_x;
rd_w <= rd_x;
data_w <= alu_res_x;
is_halt_w <= is_halt_x;
is_write_w <= (!is_halt_x) && (instr_x[7:6] == 2'b00 || instr_x[7:6] == 2'b01);
end
// Stage X: Decode/Execute
if (!halt_out) begin
instr_x <= instr_f;
pc_x <= pc_f;
npc_x <= npc_f;
btarget_x <= btarget_f;
is_branch_x <= is_branch_f;
cond_x <= cond_f;
off_x <= off_f;
// Decode instruction
case (instr_f[7:6])
2'b00: begin // ADDI
rd_x <= instr_f[5:4];
rs_x <= 2'b00; // Not used
// ALU: R[rd] + sign_ext(imm)
// Read R[rd] with forwarding
if (use_fwd_x) begin
alu_res_x <= fwd_val_x + sext4(instr_f[3:0]);
end else begin
alu_res_x <= rf[instr_f[5:4]] + sext4(instr_f[3:0]);
end
is_halt_x <= 1'b0;
branch_taken_x <= 1'b0;
end
2'b01: begin // MOV
rd_x <= instr_f[5:4];
rs_x <= instr_f[3:2];
// ALU: R[rs]
if (use_fwd_x) begin
alu_res_x <= fwd_val_x;
end else begin
alu_res_x <= rf[instr_f[3:2]];
end
is_halt_x <= 1'b0;
branch_taken_x <= 1'b0;
end
2'b10: begin // BRcc
rd_x <= 2'b00; // Not used
rs_x <= 2'b00; // Not used
// Evaluate branch condition
case (instr_f[5:4])
2'b00: branch_taken_x <= 1'b1; // Always
2'b01: branch_taken_x <= (rf[0] == 8'sh0) ? 1'b1 : 1'b0; // Zero R0
2'b10: branch_taken_x <= (rf[0] < 8'sh0) ? 1'b1 : 1'b0; // Neg R0
2'b11: branch_taken_x <= (rf[0] > 8'sh0) ? 1'b1 : 1'b0; // Pos R0
default: branch_taken_x <= 1'b0;
endcase
alu_res_x <= 8'sh0; // Dummy
is_halt_x <= 1'b0;
end
2'b11: begin // HALT
rd_x <= 2'b00;
rs_x <= 2'b00;
alu_res_x <= 8'sh0;
is_halt_x <= 1'b1;
branch_taken_x <= 1'b0;
end
default: begin
rd_x <= 2'b00;
rs_x <= 2'b00;
alu_res_x <= 8'sh0;
is_halt_x <= 1'b0;
branch_taken_x <= 1'b0;
end
endcase
end
// Stage F: Fetch
if (!halt_out) begin
// Determine next PC for fetch
// If previous stage (X) was a branch and taken, next PC is branch target.
// Note: The instruction in the delay slot (fetched by this cycle's F)
// is executed but its effects might be discarded if we consider the
// "discard delay slot" model. However, the problem says "1 branch delay
// slot is acceptable". Standard MIPS-style: delay slot instruction executes.
// The PC after the branch+delay sequence is target if taken, else npc.
// We need to update pc_f for the *next* fetch.
// The current pc_f is being used to fetch instr_f.
// The next pc_f should be:
// If X stage branch taken: btarget_x
// Else: npc_x (which is pc_x + 1)
if (is_branch_x && branch_taken_x) begin
pc_f <= btarget_x;
end else begin
pc_f <= npc_x;
end
// Fetch instruction at pc_f
// IMEM_INIT is 256 bits. Byte 0 is at MSB.
// PC is byte address.
// instr = IMEM_INIT[PC*8 +: 8]
instr_f <= IMEM_INIT[pc_f*8 +: 8];
// Compute next PC (sequential)
npc_f <= pc_f + 5'd1;
// Decode branch info for next stage
if (instr_f[7:6] == 2'b10) begin
is_branch_f <= 1'b1;
cond_f <= instr_f[5:4];
off_f <= instr_f[3:0];
btarget_f <= pc_f + sext4_pc(instr_f[3:0]) + 5'd1;
end else begin
is_branch_f <= 1'b0;
cond_f <= 2'b00;
off_f <= 4'b0;
btarget_f <= 5'd0;
end
end
end
end
// Forwarding Logic for X Stage
// We need to determine if the source register for the current X instruction
// matches the destination register of the W or X stage instructions.
// The X stage instruction is instr_x. The source register is rs_x (for MOV) or rd_x (for ADDI, it reads R[rd]).
// Wait, ADDI reads R[rd] and writes R[rd]. MOV reads R[rs] and writes R[rd].
// Let's define the source register index for the current X instruction.
reg [1:0] src_reg_x;
always @(*) begin
case (instr_x[7:6])
2'b00: src_reg_x = instr_x[5:4]; // ADDI reads R[rd]
2'b01: src_reg_x = instr_x[3:2]; // MOV reads R[rs]
default: src_reg_x = 2'b00;
endcase
end
// Check W stage for forwarding
reg fwd_from_w;
reg signed [7:0] w_data;
always @(*) begin
fwd_from_w = 1'b0;
w_data = 8'sh0;
if (is_write_w && !is_halt_w) begin
if (src_reg_x == rd_w) begin
fwd_from_w = 1'b1;
w_data = data_w;
end
end
end
// Check X stage for forwarding
reg fwd_from_x;
reg signed [7:0] x_data;
always @(*) begin
fwd_from_x = 1'b0;
x_data = 8'sh0;
if (!is_halt_x) begin
// X stage produces a result for rd_x
// Check if current X instruction's source matches previous X instruction's destination
if (src_reg_x == rd_x) begin
fwd_from_x = 1'b1;
x_data = alu_res_x;
end
end
end
// Priority: W > X > RF
always @(*) begin
use_fwd_x = 1'b0;
fwd_val_x = 8'sh0;
if (fwd_from_w) begin
use_fwd_x = 1'b1;
fwd_val_x = w_data;
end else if (fwd_from_x) begin
use_fwd_x = 1'b1;
fwd_val_x = x_data;
end
end
endmodule
`default_nettype wire
```