no functional run (did not compile)
module mcu3 #(
parameter [0:255] IMEM_INIT = 256'h0 // 32 instruction bytes, byte 0 at MSB
) (
input clk,
input rst, // synchronous, active-high
output reg [4:0] pc_out, // current fetched PC (visible for testing)
output reg halt_out, // asserted forever after HALT executes
output reg signed [7:0] r0_out // current value of register R0
);
// Register File: 4 registers, 8-bit signed
reg signed [7:0] rf [0:3];
// Pipeline Stage Registers
// Fetch Stage
reg [4:0] pc_f;
reg [7:0] instr_f;
reg [4:0] npc_f; // Next PC candidate (sequential)
// Execute Stage
reg [7:0] instr_x;
reg [4:0] pc_x;
reg [1:0] opcode_x;
reg [1:0] rd_x;
reg [1:0] rs_x;
reg [3:0] imm_x;
reg [1:0] cond_x;
reg [3:0] off_x;
reg signed [7:0] rs_val_x;
reg signed [7:0] result_x;
reg is_branch_x;
reg branch_taken_x;
reg [4:0] branch_target_x;
reg is_halt_x;
// Writeback Stage
reg [7:0] instr_w;
reg [4:0] pc_w;
reg [1:0] opcode_w;
reg [1:0] rd_w;
reg signed [7:0] result_w;
reg is_branch_w;
reg branch_taken_w;
reg [4:0] branch_target_w;
reg is_halt_w;
reg [4:0] npc_w; // Next PC from previous stage
// Forwarding signals
reg [1:0] fwd_rs_x; // 0: RF, 1: EX result, 2: WB result
reg [1:0] fwd_rd_x; // Not used for rd, but for completeness
// Internal halt flag
reg halt_int;
// Initialize registers
integer i;
initial begin
for (i = 0; i < 4; i = i + 1) begin
rf[i] = 8's0;
end
end
// Helper function to sign extend 4-bit immediate to 8-bit
function signed [7:0] sign_ext4;
input [3:0] val;
begin
sign_ext4 = {{4{val[3]}}, val};
end
endfunction
// Helper function to sign extend 4-bit offset to 5-bit (for PC addition)
function signed [4:0] sign_ext4_to_5;
input [3:0] val;
begin
sign_ext4_to_5 = {{5{val[3]}}, val};
end
endfunction
// Combinational logic for forwarding
// We need to forward to rs in the Execute stage
// Sources: WB stage result, EX stage result
// Destination: rs in EX stage
always @(*) begin
// Default: read from register file
fwd_rs_x = 2'b00;
// Check if rs matches rd in WB stage and WB is valid
if (rd_w != 2'b00 && rs_x == rd_w && !is_halt_w) begin
fwd_rs_x = 2'b10; // Forward from WB
end
// Check if rs matches rd in EX stage and EX is valid (and not WB)
else if (rd_x != 2'b00 && rs_x == rd_x && !is_halt_x) begin
fwd_rs_x = 2'b01; // Forward from EX
end
end
// Select rs value based on forwarding
wire signed [7:0] rs_val_sel;
assign rs_val_sel = fwd_rs_x == 2'b00 ? rf[rs_x] :
fwd_rs_x == 2'b01 ? result_x :
result_w;
// Pipeline Stage Logic
always @(posedge clk) begin
if (rst) begin
// Reset
pc_f <= 5'b00000;
instr_f <= 8'b0;
npc_f <= 5'b00001;
instr_x <= 8'b0;
pc_x <= 5'b00000;
opcode_x <= 2'b00;
rd_x <= 2'b00;
rs_x <= 2'b00;
imm_x <= 4'b0;
cond_x <= 2'b00;
off_x <= 4'b0;
rs_val_x <= 8's0;
result_x <= 8's0;
is_branch_x <= 1'b0;
branch_taken_x <= 1'b0;
branch_target_x <= 5'b0;
is_halt_x <= 1'b0;
instr_w <= 8'b0;
pc_w <= 5'b00000;
opcode_w <= 2'b00;
rd_w <= 2'b00;
result_w <= 8's0;
is_branch_w <= 1'b0;
branch_taken_w <= 1'b0;
branch_target_w <= 5'b0;
is_halt_w <= 1'b0;
npc_w <= 5'b00001;
halt_int <= 1'b0;
// Reset register file
for (i = 0; i < 4; i = i + 1) begin
rf[i] <= 8's0;
end
end else if (halt_int) begin
// Halt: freeze pipeline
// Do nothing, keep current state
end else begin
// Normal operation
// Writeback Stage: Update register file or halt
if (is_halt_w) begin
halt_int <= 1'b1;
end else if (opcode_w == 2'b00 || opcode_w == 2'b01) begin
// ADDI or MOV: write result to rd
if (rd_w != 2'b00) begin
rf[rd_w] <= result_w;
end
end
// BRcc does not write to register file
// Update WB stage from EX stage
instr_w <= instr_x;
pc_w <= pc_x;
opcode_w <= opcode_x;
rd_w <= rd_x;
result_w <= result_x;
is_branch_w <= is_branch_x;
branch_taken_w <= branch_taken_x;
branch_target_w <= branch_target_x;
is_halt_w <= is_halt_x;
npc_w <= npc_f; // The next PC computed in fetch
// Update EX stage from F stage
instr_x <= instr_f;
pc_x <= pc_f;
// Decode instruction in EX stage
opcode_x <= instr_f[7:6];
rd_x <= instr_f[5:4];
if (instr_f[7:6] == 2'b00) begin
// ADDI: 00 rd[2] imm[4]
rs_x <= 2'b00; // Not used
imm_x <= instr_f[3:0];
cond_x <= 2'b00;
off_x <= 4'b0;
is_branch_x <= 1'b0;
is_halt_x <= 1'b0;
end else if (instr_f[7:6] == 2'b01) begin
// MOV: 01 rd[2] rs[2] _[2]
rs_x <= instr_f[3:2];
imm_x <= 4'b0;
cond_x <= 2'b00;
off_x <= 4'b0;
is_branch_x <= 1'b0;
is_halt_x <= 1'b0;
end else if (instr_f[7:6] == 2'b10) begin
// BRcc: 10 cond[2] off[4]
cond_x <= instr_f[5:4];
off_x <= instr_f[3:0];
rd_x <= 2'b00;
rs_x <= 2'b00;
imm_x <= 4'b0;
is_branch_x <= 1'b1;
is_halt_x <= 1'b0;
end else begin
// HALT: 11 _ _ _ _ _ _
rd_x <= 2'b00;
rs_x <= 2'b00;
imm_x <= 4'b0;
cond_x <= 2'b00;
off_x <= 4'b0;
is_branch_x <= 1'b0;
is_halt_x <= 1'b1;
end
// Execute stage computation
if (is_halt_x) begin
result_x <= 8's0;
branch_taken_x <= 1'b0;
branch_target_x <= 5'b0;
end else if (opcode_x == 2'b00) begin
// ADDI: R[rd] <= R[rd] + sign_ext(imm, 4)
// rd is the source and destination
signed [7:0] rd_val;
rd_val = fwd_rs_x == 2'b00 ? rf[rd_x] :
fwd_rs_x == 2'b01 ? result_x :
result_w;
result_x = rd_val + sign_ext4(imm_x);
branch_taken_x <= 1'b0;
branch_target_x <= 5'b0;
end else if (opcode_x == 2'b01) begin
// MOV: R[rd] <= R[rs]
result_x = rs_val_sel;
branch_taken_x <= 1'b0;
branch_target_x <= 5'b0;
end else if (is_branch_x) begin
// BRcc: Evaluate condition
result_x <= 8's0; // Not used
branch_target_x <= pc_x + sign_ext4_to_5(off_x) + 5'b1;
case (cond_x)
2'b00: branch_taken_x <= 1'b1; // Always
2'b01: branch_taken_x <= (rf[0] == 8's0) ? 1'b1 : 1'b0; // Zero R0
2'b10: branch_taken_x <= (rf[0] < 8's0) ? 1'b1 : 1'b0; // Neg R0
2'b11: branch_taken_x <= (rf[0] > 8's0) ? 1'b1 : 1'b0; // Pos R0
default: branch_taken_x <= 1'b0;
endcase
end else begin
result_x <= 8's0;
branch_taken_x <= 1'b0;
branch_target_x <= 5'b0;
end
// Update F stage
if (halt_int) begin
// Already halted, do nothing
end else if (is_branch_w && branch_taken_w) begin
// Branch taken: next PC is branch target
// But we must account for the delay slot
// The instruction after the branch (at npc_w) is executed
// The next fetch should be the branch target
pc_f <= branch_target_w;
// Fetch the instruction at the branch target
// Note: IMEM_INIT is indexed with byte 0 at MSB
// IMEM_INIT[PC*8 +: 8] means bits [PC*8+7 : PC*8]
instr_f <= IMEM_INIT[branch_target_w*8 +: 8];
npc_f <= branch_target_w + 5'b1;
end else begin
// No branch taken or not a branch: sequential
pc_f <= npc_w;
instr_f <= IMEM_INIT[npc_w*8 +: 8];
npc_f <= npc_w + 5'b1;
end
end
end
// Output assignments
always @(posedge clk) begin
if (rst) begin
pc_out <= 5'b00000;
halt_out <= 1'b0;
r0_out <= 8's0;
end else if (halt_int) begin
halt_out <= 1'b1;
// pc_out and r0_out remain as they were when halt occurred
end else begin
pc_out <= pc_f;
halt_out <= 1'b0;
// r0_out reflects the most recently written value of R[0]
// We need to track the last written value to R[0]
// This is tricky because we need to know what was written in WB
// Let's use a separate register for r0_out
end
end
// Track R0 for output
reg signed [7:0] r0_last;
always @(posedge clk) begin
if (rst) begin
r0_last <= 8's0;
end else if (halt_int) begin
// Keep last value
end else begin
// Check if R[0] was written in this cycle
if (opcode_w == 2'b00 || opcode_w == 2'b01) begin
if (rd_w == 2'b00) begin
r0_last <= result_w;
end else begin
// Keep previous value
end
end else begin
// Keep previous value
end
end
end
// Assign r0_out
assign r0_out = r0_last;
endmodule```verilog
// Branch convention: The instruction immediately following a BRcc is always
// executed (branch delay slot). If the branch is taken, the instruction in
// the delay slot is fetched and executed, but its architectural effects
// (register writes) are discarded if the branch target is different from
// the sequential PC. If the branch is not taken, the delay slot instruction
// executes normally.
module mcu3 #(
parameter [0:255] IMEM_INIT = 256'h0 // 32 instruction bytes, byte 0 at MSB
) (
input clk,
input rst, // synchronous, active-high
output reg [4:0] pc_out, // current fetched PC (visible for testing)
output reg halt_out, // asserted forever after HALT executes
output reg signed [7:0] r0_out // current value of register R0
);
// Register File: 4 registers, 8-bit signed
reg signed [7:0] rf [0:3];
// Pipeline Stage Registers
// Fetch Stage
reg [4:0] pc_f;
reg [7:0] instr_f;
reg [4:0] npc_f; // Next PC candidate (sequential)
// Execute Stage
reg [7:0] instr_x;
reg [4:0] pc_x;
reg [1:0] opcode_x;
reg [1:0] rd_x;
reg [1:0] rs_x;
reg [3:0] imm_x;
reg [1:0] cond_x;
reg [3:0] off_x;
reg signed [7:0] rs_val_x;
reg signed [7:0] result_x;
reg is_branch_x;
reg branch_taken_x;
reg [4:0] branch_target_x;
reg is_halt_x;
// Writeback Stage
reg [7:0] instr_w;
reg [4:0] pc_w;
reg [1:0] opcode_w;
reg [1:0] rd_w;
reg signed [7:0] result_w;
reg is_branch_w;
reg branch_taken_w;
reg [4:0] branch_target_w;
reg is_halt_w;
reg [4:0] npc_w; // Next PC from previous stage
// Forwarding signals
reg [1:0] fwd_rs_x; // 0: RF, 1: EX result, 2: WB result
reg [1:0] fwd_rd_x; // Not used for rd, but for completeness
// Internal halt flag
reg halt_int;
// Initialize registers
integer i;
initial begin
for (i = 0; i < 4; i = i + 1) begin
rf[i] = 8's0;
end
end
// Helper function to sign extend 4-bit immediate to 8-bit
function signed [7:0] sign_ext4;
input [3:0] val;
begin
sign_ext4 = {{4{val[3]}}, val};
end
endfunction
// Helper function to sign extend 4-bit offset to 5-bit (for PC addition)
function signed [4:0] sign_ext4_to_5;
input [3:0] val;
begin
sign_ext4_to_5 = {{5{val[3]}}, val};
end
endfunction
// Combinational logic for forwarding
// We need to forward to rs in the Execute stage
// Sources: WB stage result, EX stage result
// Destination: rs in EX stage
always @(*) begin
// Default: read from register file
fwd_rs_x = 2'b00;
// Check if rs matches rd in WB stage and WB is valid
if (rd_w != 2'b00 && rs_x == rd_w && !is_halt_w) begin
fwd_rs_x = 2'b10; // Forward from WB
end
// Check if rs matches rd in EX stage and EX is valid (and not WB)
else if (rd_x != 2'b00 && rs_x == rd_x && !is_halt_x) begin
fwd_rs_x = 2'b01; // Forward from EX
end
end
// Select rs value based on forwarding
wire signed [7:0] rs_val_sel;
assign rs_val_sel = fwd_rs_x == 2'b00 ? rf[rs_x] :
fwd_rs_x == 2'b01 ? result_x :
result_w;
// Pipeline Stage Logic
always @(posedge clk) begin
if (rst) begin
// Reset
pc_f <= 5'b00000;
instr_f <= 8'b0;
npc_f <= 5'b00001;
instr_x <= 8'b0;
pc_x <= 5'b00000;
opcode_x <= 2'b00;
rd_x <= 2'b00;
rs_x <= 2'b00;
imm_x <= 4'b0;
cond_x <= 2'b00;
off_x <= 4'b0;
rs_val_x <= 8's0;
result_x <= 8's0;
is_branch_x <= 1'b0;
branch_taken_x <= 1'b0;
branch_target_x <= 5'b0;
is_halt_x <= 1'b0;
instr_w <= 8'b0;
pc_w <= 5'b00000;
opcode_w <= 2'b00;
rd_w <= 2'b00;
result_w <= 8's0;
is_branch_w <= 1'b0;
branch_taken_w <= 1'b0;
branch_target_w <= 5'b0;
is_halt_w <= 1'b0;
npc_w <= 5'b00001;
halt_int <= 1'b0;
// Reset register file
for (i = 0; i < 4; i = i + 1) begin
rf[i] <= 8's0;
end
end else if (halt_int) begin
// Halt: freeze pipeline
// Do nothing, keep current state
end else begin
// Normal operation
// Writeback Stage: Update register file or halt
if (is_halt_w) begin
halt_int <= 1'b1;
end else if (opcode_w == 2'b00 || opcode_w == 2'b01) begin
// ADDI or MOV: write result to rd
if (rd_w != 2'b00) begin
rf[rd_w] <= result_w;
end
end
// BRcc does not write to register file
// Update WB stage from EX stage
instr_w <= instr_x;
pc_w <= pc_x;
opcode_w <= opcode_x;
rd_w <= rd_x;
result_w <= result_x;
is_branch_w <= is_branch_x;
branch_taken_w <= branch_taken_x;
branch_target_w <= branch_target_x;
is_halt_w <= is_halt_x;
npc_w <= npc_f; // The next PC computed in fetch
// Update EX stage from F stage
instr_x <= instr_f;
pc_x <= pc_f;
// Decode instruction in EX stage
opcode_x <= instr_f[7:6];
rd_x <= instr_f[5:4];
if (instr_f[7:6] == 2'b00) begin
// ADDI: 00 rd[2] imm[4]
rs_x <= 2'b00; // Not used
imm_x <= instr_f[3:0];
cond_x <= 2'b00;
off_x <= 4'b0;
is_branch_x <= 1'b0;
is_halt_x <= 1'b0;
end else if (instr_f[7:6] == 2'b01) begin
// MOV: 01 rd[2] rs[2] _[2]
rs_x <= instr_f[3:2];
imm_x <= 4'b0;
cond_x <= 2'b00;
off_x <= 4'b0;
is_branch_x <= 1'b0;
is_halt_x <= 1'b0;
end else if (instr_f[7:6] == 2'b10) begin
// BRcc: 10 cond[2] off[4]
cond_x <= instr_f[5:4];
off_x <= instr_f[3:0];
rd_x <= 2'b00;
rs_x <= 2'b00;
imm_x <= 4'b0;
is_branch_x <= 1'b1;
is_halt_x <= 1'b0;
end else begin
// HALT: 11 _ _ _ _ _ _
rd_x <= 2'b00;
rs_x <= 2'b00;
imm_x <= 4'b0;
cond_x <= 2'b00;
off_x <= 4'b0;
is_branch_x <= 1'b0;
is_halt_x <= 1'b1;
end
// Execute stage computation
if (is_halt_x) begin
result_x <= 8's0;
branch_taken_x <= 1'b0;
branch_target_x <= 5'b0;
end else if (opcode_x == 2'b00) begin
// ADDI: R[rd] <= R[rd] + sign_ext(imm, 4)
// rd is the source and destination
signed [7:0] rd_val;
rd_val = fwd_rs_x == 2'b00 ? rf[rd_x] :
fwd_rs_x == 2'b01 ? result_x :
result_w;
result_x = rd_val + sign_ext4(imm_x);
branch_taken_x <= 1'b0;
branch_target_x <= 5'b0;
end else if (opcode_x == 2'b01) begin
// MOV: R[rd] <= R[rs]
result_x = rs_val_sel;
branch_taken_x <= 1'b0;
branch_target_x <= 5'b0;
end else if (is_branch_x) begin
// BRcc: Evaluate condition
result_x <= 8's0; // Not used
branch_target_x <= pc_x + sign_ext4_to_5(off_x) + 5'b1;
case (cond_x)
2'b00: branch_taken_x <= 1'b1; // Always
2'b01: branch_taken_x <= (rf[0] == 8's0) ? 1'b1 : 1'b0; // Zero R0
2'b10: branch_taken_x <= (rf[0] < 8's0) ? 1'b1 : 1'b0; // Neg R0
2'b11: branch_taken_x <= (rf[0] > 8's0) ? 1'b1 : 1'b0; // Pos R0
default: branch_taken_x <= 1'b0;
endcase
end else begin
result_x <= 8's0;
branch_taken_x <= 1'b0;
branch_target_x <= 5'b0;
end
// Update F stage
if (halt_int) begin
// Already halted, do nothing
end else if (is_branch_w && branch_taken_w) begin
// Branch taken: next PC is branch target
// But we must account for the delay slot
// The instruction after the branch (at npc_w) is executed
// The next fetch should be the branch target
pc_f <= branch_target_w;
// Fetch the instruction at the branch target
// Note: IMEM_INIT is indexed with byte 0 at MSB
// IMEM_INIT[PC*8 +: 8] means bits [PC*8+7 : PC*8]
instr_f <= IMEM_INIT[branch_target_w*8 +: 8];
npc_f <= branch_target_w + 5'b1;
end else begin
// No branch taken or not a branch: sequential
pc_f <= npc_w;
instr_f <= IMEM_INIT[npc_w*8 +: 8];
npc_f <= npc_w + 5'b1;
end
end
end
// Output assignments
always @(posedge clk) begin
if (rst) begin
pc_out <= 5'b00000;
halt_out <= 1'b0;
r0_out <= 8's0;
end else if (halt_int) begin
halt_out <= 1'b1;
// pc_out and r0_out remain as they were when halt occurred
end else begin
pc_out <= pc_f;
halt_out <= 1'b0;
// r0_out reflects the most recently written value of R[0]
// We need to track the last written value to R[0]
// This is tricky because we need to know what was written in WB
// Let's use a separate register for r0_out
end
end
// Track R0 for output
reg signed [7:0] r0_last;
always @(posedge clk) begin
if (rst) begin
r0_last <= 8's0;
end else if (halt_int) begin
// Keep last value
end else begin
// Check if R[0] was written in this cycle
if (opcode_w == 2'b00 || opcode_w == 2'b01) begin
if (rd_w == 2'b00) begin
r0_last <= result_w;
end else begin
// Keep previous value
end
end else begin
// Keep previous value
end
end
end
// Assign r0_out
assign r0_out = r0_last;
endmodule
```