no functional run (did not compile)
(no automated diagnostics fired)
module mcu3 #(
parameter [0:255] IMEM_INIT = 256'h0 // 32 instruction bytes, byte 0 at MSB
) (
input clk,
input rst, // synchronous, active-high
output reg [4:0] pc_out, // current fetched PC (visible for testing)
output reg halt_out, // asserted forever after HALT executes
output reg signed [7:0] r0_out // current value of register R0
);
// Pipeline registers
// Stage F (Fetch)
reg [4:0] pc_f;
reg [7:0] instr_f;
// Stage X (Execute)
reg [4:0] pc_x;
reg [7:0] instr_x;
reg [1:0] opcode_x;
reg [1:0] rd_x;
reg [1:0] rs_x;
reg [1:0] cond_x;
reg [3:0] imm_x;
reg [3:0] off_x;
reg [7:0] alu_result_x;
reg is_branch_x;
reg branch_taken_x;
reg is_halt_x;
reg rd_valid_x;
// Stage W (Writeback)
reg [4:0] pc_w;
reg [7:0] instr_w;
reg [1:0] opcode_w;
reg [1:0] rd_w;
reg [7:0] alu_result_w;
reg is_halt_w;
reg rd_valid_w;
// Register File
reg signed [7:0] rf [0:3];
// Forwarding logic
reg signed [7:0] fwd_val_x;
reg fwd_valid_x;
// Next PC logic
reg [4:0] pc_next;
// Decode F stage
always @(*) begin
// Fetch instruction from IMEM
// IMEM_INIT is 256 bits, 32 bytes. Byte 0 is at MSB.
// Byte address PC maps to bits [PC*8 +: 8]
instr_f = IMEM_INIT[pc_f*8 +: 8];
end
// Decode X stage
always @(*) begin
opcode_x = instr_x[7:6];
case (opcode_x)
2'b00: begin // ADDI
rd_x = instr_x[5:4];
imm_x = instr_x[3:0];
is_branch_x = 1'b0;
is_halt_x = 1'b0;
rd_valid_x = 1'b1;
end
2'b01: begin // MOV
rd_x = instr_x[5:4];
rs_x = instr_x[3:2];
is_branch_x = 1'b0;
is_halt_x = 1'b0;
rd_valid_x = 1'b1;
end
2'b10: begin // BRcc
cond_x = instr_x[5:4];
off_x = instr_x[3:0];
is_branch_x = 1'b1;
is_halt_x = 1'b0;
rd_valid_x = 1'b0;
end
default: begin // HALT
is_branch_x = 1'b0;
is_halt_x = 1'b1;
rd_valid_x = 1'b0;
end
endcase
end
// Forwarding logic for X stage
// Forward from W stage if W is writing to the register X is reading
always @(*) begin
if (opcode_x == 2'b01 && rd_valid_w && (rs_x == rd_w)) begin
fwd_val_x = alu_result_w;
fwd_valid_x = 1'b1;
end else if (opcode_x == 2'b00 && rd_valid_w && (rd_x == rd_w)) begin
// For ADDI, we might need to forward if rd is also rs? No, ADDI doesn't read rs.
// But if ADDI writes to rd, and next ADDI reads rd? No, ADDI doesn't read rd.
// Actually, ADDI reads nothing from RF. MOV reads rs.
// So forwarding is only needed for MOV reading rs.
fwd_val_x = alu_result_w;
fwd_valid_x = 1'b1;
end else begin
fwd_valid_x = 1'b0;
fwd_val_x = 8'b0;
end
end
// Execute stage ALU and branch logic
always @(*) begin
// Default values
alu_result_x = 8'b0;
branch_taken_x = 1'b0;
if (opcode_x == 2'b00) begin // ADDI
// Read rd from RF, with forwarding
reg signed [7:0] rd_val;
if (fwd_valid_x && (rd_x == rd_w)) begin
rd_val = fwd_val_x;
end else begin
rd_val = rf[rd_x];
end
// Sign extend imm
reg signed [7:0] imm_sext;
imm_sext = {{4{imm_x[3]}}, imm_x};
alu_result_x = rd_val + imm_sext;
end else if (opcode_x == 2'b01) begin // MOV
// Read rs from RF, with forwarding
reg signed [7:0] rs_val;
if (fwd_valid_x && (rs_x == rd_w)) begin
rs_val = fwd_val_x;
end else begin
rs_val = rf[rs_x];
end
alu_result_x = rs_val;
end else if (opcode_x == 2'b10) begin // BRcc
// Determine if branch is taken
case (cond_x)
2'b00: branch_taken_x = 1'b1; // Always
2'b01: branch_taken_x = (r0_out == 8'sb0); // Zero
2'b10: branch_taken_x = (r0_out < 8'sb0); // Negative
2'b11: branch_taken_x = (r0_out > 8'sb0); // Positive
default: branch_taken_x = 1'b0;
endcase
// Calculate branch target
reg signed [7:0] off_sext;
off_sext = {{4{off_x[3]}}, off_x};
// PC for branch is pc_x + 1 + off
// But we need to output the target for W stage
// We'll compute the target in W stage or here
// Let's store the target in alu_result_x for simplicity
alu_result_x = pc_x + 1 + off_sext;
end else if (opcode_x == 2'b11) begin // HALT
// Nothing to compute
end
end
// Pipeline registers update
always @(posedge clk) begin
if (rst) begin
// Reset all pipeline stages
pc_f <= 5'b0;
instr_f <= 8'b0;
pc_x <= 5'b0;
instr_x <= 8'b0;
opcode_x <= 2'b0;
rd_x <= 2'b0;
rs_x <= 2'b0;
cond_x <= 2'b0;
imm_x <= 4'b0;
off_x <= 4'b0;
alu_result_x <= 8'b0;
is_branch_x <= 1'b0;
branch_taken_x <= 1'b0;
is_halt_x <= 1'b0;
rd_valid_x <= 1'b0;
pc_w <= 5'b0;
instr_w <= 8'b0;
opcode_w <= 2'b0;
rd_w <= 2'b0;
alu_result_w <= 8'b0;
is_halt_w <= 1'b0;
rd_valid_w <= 1'b0;
// Reset register file
rf[0] <= 8'sb0;
rf[1] <= 8'sb0;
rf[2] <= 8'sb0;
rf[3] <= 8'sb0;
// Reset outputs
pc_out <= 5'b0;
halt_out <= 1'b0;
r0_out <= 8'sb0;
end else begin
// Update F stage
if (!halt_out) begin
pc_f <= pc_next;
instr_f <= IMEM_INIT[pc_next*8 +: 8];
end
// Update X stage
if (!halt_out) begin
pc_x <= pc_f;
instr_x <= instr_f;
opcode_x <= instr_f[7:6];
case (instr_f[7:6])
2'b00: begin
rd_x <= instr_f[5:4];
imm_x <= instr_f[3:0];
end
2'b01: begin
rd_x <= instr_f[5:4];
rs_x <= instr_f[3:2];
end
2'b10: begin
cond_x <= instr_f[5:4];
off_x <= instr_f[3:0];
end
default: begin
// HALT
end
endcase
is_branch_x <= (instr_f[7:6] == 2'b10);
is_halt_x <= (instr_f[7:6] == 2'b11);
rd_valid_x <= (instr_f[7:6] != 2'b10) && (instr_f[7:6] != 2'b11);
end
// Update W stage
if (!halt_out) begin
pc_w <= pc_x;
instr_w <= instr_x;
opcode_w <= opcode_x;
rd_w <= rd_x;
alu_result_w <= alu_result_x;
is_halt_w <= is_halt_x;
rd_valid_w <= rd_valid_x;
end
// Update register file
if (rd_valid_w && !is_halt_w && !halt_out) begin
rf[rd_w] <= alu_result_w;
end
// Update outputs
pc_out <= pc_f;
if (is_halt_w || halt_out) begin
halt_out <= 1'b1;
end else begin
halt_out <= 1'b0;
end
// Update r0_out
if (rd_valid_w && (rd_w == 2'b0) && !is_halt_w && !halt_out) begin
r0_out <= alu_result_w;
end else if (halt_out) begin
// Keep r0_out as is
end else begin
// If no write to R0, keep previous value?
// The spec says "r0_out always reflects the most recently written value of R[0]"
// So we need to keep it if not written
// But we also need to handle the case where R0 is updated in RF
// Let's just mirror rf[0] if R0 was written, otherwise keep r0_out
// Actually, let's just mirror rf[0] always for simplicity, since rf[0] is the source of truth
r0_out <= rf[0];
end
// Compute next PC
if (is_branch_x && branch_taken_x) begin
// Branch taken, target is in alu_result_x
// But we need to handle the delay slot
// The instruction after the branch (delay slot) executes
// Then the PC jumps to the target
// So pc_next should be the branch target
pc_next = alu_result_x;
end else begin
// No branch or branch not taken
pc_next = pc_f + 1;
end
end
end
endmodule```verilog
// Branch convention: The instruction immediately following a branch (the delay slot)
// always executes. The branch target PC is calculated in the X stage and applied
// in the W stage if the condition is met. If the branch is not taken, PC increments.
// This results in a 1-cycle branch delay slot.
module mcu3 #(
parameter [0:255] IMEM_INIT = 256'h0 // 32 instruction bytes, byte 0 at MSB
) (
input clk,
input rst, // synchronous, active-high
output reg [4:0] pc_out, // current fetched PC (visible for testing)
output reg halt_out, // asserted forever after HALT executes
output reg signed [7:0] r0_out // current value of register R0
);
// Pipeline registers
// Stage F (Fetch)
reg [4:0] pc_f;
reg [7:0] instr_f;
// Stage X (Execute)
reg [4:0] pc_x;
reg [7:0] instr_x;
reg [1:0] opcode_x;
reg [1:0] rd_x;
reg [1:0] rs_x;
reg [1:0] cond_x;
reg [3:0] imm_x;
reg [3:0] off_x;
reg [7:0] alu_result_x;
reg is_branch_x;
reg branch_taken_x;
reg is_halt_x;
reg rd_valid_x;
// Stage W (Writeback)
reg [4:0] pc_w;
reg [7:0] instr_w;
reg [1:0] opcode_w;
reg [1:0] rd_w;
reg [7:0] alu_result_w;
reg is_halt_w;
reg rd_valid_w;
// Register File
reg signed [7:0] rf [0:3];
// Forwarding logic
reg signed [7:0] fwd_val_x;
reg fwd_valid_x;
// Next PC logic
reg [4:0] pc_next;
// Decode F stage
always @(*) begin
// Fetch instruction from IMEM
// IMEM_INIT is 256 bits, 32 bytes. Byte 0 is at MSB.
// Byte address PC maps to bits [PC*8 +: 8]
instr_f = IMEM_INIT[pc_f*8 +: 8];
end
// Decode X stage
always @(*) begin
opcode_x = instr_x[7:6];
case (opcode_x)
2'b00: begin // ADDI
rd_x = instr_x[5:4];
imm_x = instr_x[3:0];
is_branch_x = 1'b0;
is_halt_x = 1'b0;
rd_valid_x = 1'b1;
end
2'b01: begin // MOV
rd_x = instr_x[5:4];
rs_x = instr_x[3:2];
is_branch_x = 1'b0;
is_halt_x = 1'b0;
rd_valid_x = 1'b1;
end
2'b10: begin // BRcc
cond_x = instr_x[5:4];
off_x = instr_x[3:0];
is_branch_x = 1'b1;
is_halt_x = 1'b0;
rd_valid_x = 1'b0;
end
default: begin // HALT
is_branch_x = 1'b0;
is_halt_x = 1'b1;
rd_valid_x = 1'b0;
end
endcase
end
// Forwarding logic for X stage
// Forward from W stage if W is writing to the register X is reading
always @(*) begin
if (opcode_x == 2'b01 && rd_valid_w && (rs_x == rd_w)) begin
fwd_val_x = alu_result_w;
fwd_valid_x = 1'b1;
end else if (opcode_x == 2'b00 && rd_valid_w && (rd_x == rd_w)) begin
// For ADDI, we might need to forward if rd is also rs? No, ADDI doesn't read rs.
// But if ADDI writes to rd, and next ADDI reads rd? No, ADDI doesn't read rd.
// Actually, ADDI reads nothing from RF. MOV reads rs.
// So forwarding is only needed for MOV reading rs.
fwd_val_x = alu_result_w;
fwd_valid_x = 1'b1;
end else begin
fwd_valid_x = 1'b0;
fwd_val_x = 8'b0;
end
end
// Execute stage ALU and branch logic
always @(*) begin
// Default values
alu_result_x = 8'b0;
branch_taken_x = 1'b0;
if (opcode_x == 2'b00) begin // ADDI
// Read rd from RF, with forwarding
reg signed [7:0] rd_val;
if (fwd_valid_x && (rd_x == rd_w)) begin
rd_val = fwd_val_x;
end else begin
rd_val = rf[rd_x];
end
// Sign extend imm
reg signed [7:0] imm_sext;
imm_sext = {{4{imm_x[3]}}, imm_x};
alu_result_x = rd_val + imm_sext;
end else if (opcode_x == 2'b01) begin // MOV
// Read rs from RF, with forwarding
reg signed [7:0] rs_val;
if (fwd_valid_x && (rs_x == rd_w)) begin
rs_val = fwd_val_x;
end else begin
rs_val = rf[rs_x];
end
alu_result_x = rs_val;
end else if (opcode_x == 2'b10) begin // BRcc
// Determine if branch is taken
case (cond_x)
2'b00: branch_taken_x = 1'b1; // Always
2'b01: branch_taken_x = (r0_out == 8'sb0); // Zero
2'b10: branch_taken_x = (r0_out < 8'sb0); // Negative
2'b11: branch_taken_x = (r0_out > 8'sb0); // Positive
default: branch_taken_x = 1'b0;
endcase
// Calculate branch target
reg signed [7:0] off_sext;
off_sext = {{4{off_x[3]}}, off_x};
// PC for branch is pc_x + 1 + off
// But we need to output the target for W stage
// We'll compute the target in W stage or here
// Let's store the target in alu_result_x for simplicity
alu_result_x = pc_x + 1 + off_sext;
end else if (opcode_x == 2'b11) begin // HALT
// Nothing to compute
end
end
// Pipeline registers update
always @(posedge clk) begin
if (rst) begin
// Reset all pipeline stages
pc_f <= 5'b0;
instr_f <= 8'b0;
pc_x <= 5'b0;
instr_x <= 8'b0;
opcode_x <= 2'b0;
rd_x <= 2'b0;
rs_x <= 2'b0;
cond_x <= 2'b0;
imm_x <= 4'b0;
off_x <= 4'b0;
alu_result_x <= 8'b0;
is_branch_x <= 1'b0;
branch_taken_x <= 1'b0;
is_halt_x <= 1'b0;
rd_valid_x <= 1'b0;
pc_w <= 5'b0;
instr_w <= 8'b0;
opcode_w <= 2'b0;
rd_w <= 2'b0;
alu_result_w <= 8'b0;
is_halt_w <= 1'b0;
rd_valid_w <= 1'b0;
// Reset register file
rf[0] <= 8'sb0;
rf[1] <= 8'sb0;
rf[2] <= 8'sb0;
rf[3] <= 8'sb0;
// Reset outputs
pc_out <= 5'b0;
halt_out <= 1'b0;
r0_out <= 8'sb0;
end else begin
// Update F stage
if (!halt_out) begin
pc_f <= pc_next;
instr_f <= IMEM_INIT[pc_next*8 +: 8];
end
// Update X stage
if (!halt_out) begin
pc_x <= pc_f;
instr_x <= instr_f;
opcode_x <= instr_f[7:6];
case (instr_f[7:6])
2'b00: begin
rd_x <= instr_f[5:4];
imm_x <= instr_f[3:0];
end
2'b01: begin
rd_x <= instr_f[5:4];
rs_x <= instr_f[3:2];
end
2'b10: begin
cond_x <= instr_f[5:4];
off_x <= instr_f[3:0];
end
default: begin
// HALT
end
endcase
is_branch_x <= (instr_f[7:6] == 2'b10);
is_halt_x <= (instr_f[7:6] == 2'b11);
rd_valid_x <= (instr_f[7:6] != 2'b10) && (instr_f[7:6] != 2'b11);
end
// Update W stage
if (!halt_out) begin
pc_w <= pc_x;
instr_w <= instr_x;
opcode_w <= opcode_x;
rd_w <= rd_x;
alu_result_w <= alu_result_x;
is_halt_w <= is_halt_x;
rd_valid_w <= rd_valid_x;
end
// Update register file
if (rd_valid_w && !is_halt_w && !halt_out) begin
rf[rd_w] <= alu_result_w;
end
// Update outputs
pc_out <= pc_f;
if (is_halt_w || halt_out) begin
halt_out <= 1'b1;
end else begin
halt_out <= 1'b0;
end
// Update r0_out
if (rd_valid_w && (rd_w == 2'b0) && !is_halt_w && !halt_out) begin
r0_out <= alu_result_w;
end else if (halt_out) begin
// Keep r0_out as is
end else begin
// If no write to R0, keep previous value?
// The spec says "r0_out always reflects the most recently written value of R[0]"
// So we need to keep it if not written
// But we also need to handle the case where R0 is updated in RF
// Let's just mirror rf[0] if R0 was written, otherwise keep r0_out
// Actually, let's just mirror rf[0] always for simplicity, since rf[0] is the source of truth
r0_out <= rf[0];
end
// Compute next PC
if (is_branch_x && branch_taken_x) begin
// Branch taken, target is in alu_result_x
// But we need to handle the delay slot
// The instruction after the branch (delay slot) executes
// Then the PC jumps to the target
// So pc_next should be the branch target
pc_next = alu_result_x;
end else begin
// No branch or branch not taken
pc_next = pc_f + 1;
end
end
end
endmodule
```