← all candidates base

base_2

line-item PPA

passes_tests0/0
area_um2
fmax_mhz
ops_per_sec
switching_alpha
est_power_uW

per-program functional

no functional run (did not compile)

pipeline shape (heuristic register bank count)

F stage regs3
X stage regs17
W stage regs11
unstaged regs1

mistakes detected

(no automated diagnostics fired)

extracted RTL

module mcu3 #(
    parameter [0:255] IMEM_INIT = 256'h0  // 32 instruction bytes, byte 0 at MSB
) (
    input               clk,
    input               rst,             // synchronous, active-high
    output reg [4:0]    pc_out,          // current fetched PC (visible for testing)
    output reg          halt_out,        // asserted forever after HALT executes
    output reg signed [7:0] r0_out       // current value of register R0
);

    // Register File: 4 registers, 8-bit signed
    reg signed [7:0] rf [0:3];

    // Pipeline Stage Registers
    // Fetch Stage
    reg [4:0] pc_f;
    reg [7:0] instr_f;
    reg [4:0] npc_f; // Next PC candidate (PC+1)

    // Execute Stage
    reg [4:0] pc_x;
    reg [7:0] instr_x;
    reg [4:0] npc_x; // Next PC candidate from F stage
    reg [1:0] opcode_x;
    reg [1:0] rd_x;
    reg [1:0] rs_x;
    reg [3:0] imm_x;
    reg [1:0] cond_x;
    reg [3:0] off_x;
    reg signed [7:0] rs_val_x; // Read value of rs
    reg signed [7:0] alu_res_x; // ALU result
    reg is_branch_x;
    reg branch_taken_x;
    reg [4:0] branch_target_x;
    reg is_halt_x;

    // Writeback Stage
    reg [4:0] pc_w;
    reg [7:0] instr_w;
    reg [4:0] npc_w;
    reg [1:0] opcode_w;
    reg [1:0] rd_w;
    reg signed [7:0] alu_res_w;
    reg is_branch_w;
    reg branch_taken_w;
    reg [4:0] branch_target_w;
    reg is_halt_w;

    // Forwarding Logic
    // We need to forward values from the Execute stage (which will be written in W) 
    // and the Writeback stage (which has just been written or is about to be committed)
    // to the Execute stage's rs read.
    // Since we have no stalls, we must forward immediately.
    // The "current" value of a register is either in the RF, or being produced by X or W.
    // Note: In a 3-stage pipeline, the result in X is available in the next cycle in W.
    // However, for the instruction in X, we need the value of rs.
    // If the previous instruction (in W) writes to rd == rs of current (in X), use W's result.
    // If the instruction before that (in X) writes to rd == rs of current (in X), use X's result.
    
    reg signed [7:0] fwd_val_x;
    reg fwd_from_w;
    reg fwd_from_x;

    // Decode logic for current instruction in X
    always @(*) begin
        opcode_x = instr_x[7:6];
        if (opcode_x == 2'b00) begin // ADDI
            rd_x = instr_x[5:4];
            imm_x = instr_x[3:0];
            rs_x = 2'b00; // Dummy
            cond_x = 2'b00; // Dummy
            off_x = 4'b0000; // Dummy
            is_branch_x = 1'b0;
            is_halt_x = 1'b0;
        end else if (opcode_x == 2'b01) begin // MOV
            rd_x = instr_x[5:4];
            rs_x = instr_x[3:2];
            imm_x = 4'b0000; // Dummy
            cond_x = 2'b00; // Dummy
            off_x = 4'b0000; // Dummy
            is_branch_x = 1'b0;
            is_halt_x = 1'b0;
        end else if (opcode_x == 2'b10) begin // BRcc
            cond_x = instr_x[5:4];
            off_x = instr_x[3:0];
            rd_x = 2'b00; // Dummy
            rs_x = 2'b00; // Dummy
            imm_x = 4'b0000; // Dummy
            is_branch_x = 1'b1;
            is_halt_x = 1'b0;
        end else begin // HALT
            rd_x = 2'b00;
            rs_x = 2'b00;
            imm_x = 4'b0000;
            cond_x = 2'b00;
            off_x = 4'b0000;
            is_branch_x = 1'b0;
            is_halt_x = 1'b1;
        end
    end

    // ALU and Branch Logic in Execute Stage
    always @(*) begin
        // Default
        alu_res_x = 8'b0;
        branch_taken_x = 1'b0;
        branch_target_x = 5'b00000;

        if (is_halt_x) begin
            alu_res_x = 8'b0;
        end else if (opcode_x == 2'b00) begin // ADDI
            // Sign extend imm[4] to 8 bits
            signed [7:0] imm_sext = {{4{imm_x[3]}}, imm_x};
            alu_res_x = rs_val_x + imm_sext;
        end else if (opcode_x == 2'b01) begin // MOV
            alu_res_x = rs_val_x;
        end else if (is_branch_x) begin
            // Compute branch target: PC + sign_ext(off, 4) + 1
            // The PC used here is the PC of the branch instruction itself (pc_x)
            signed [4:0] off_sext = {{5{off_x[3]}}, off_x};
            branch_target_x = pc_x + off_sext + 1'b1;
            
            // Evaluate condition based on R0
            // We need the current value of R0. 
            // Note: The branch condition depends on R0. 
            // If R0 was updated by the previous instruction (in W or X), we must use the forwarded value.
            // However, the forwarding logic below calculates rs_val_x. 
            // For branches, rs is dummy, but we need R0 specifically.
            // Let's handle R0 forwarding separately for branch conditions.
        end
    end

    // Determine the value of R0 for branch condition evaluation
    reg signed [7:0] r0_for_branch;
    always @(*) begin
        // Check if W stage is writing to R0
        if (opcode_w == 2'b00 && rd_w == 2'b00) begin
            r0_for_branch = alu_res_w;
        end else if (opcode_w == 2'b01 && rd_w == 2'b00) begin
            r0_for_branch = alu_res_w;
        end else begin
            // Check if X stage is writing to R0
            if (opcode_x == 2'b00 && rd_x == 2'b00) begin
                r0_for_branch = alu_res_x;
            end else if (opcode_x == 2'b01 && rd_x == 2'b00) begin
                r0_for_branch = alu_res_x;
            end else begin
                r0_for_branch = rf[0];
            end
        end
    end

    // Evaluate branch condition
    always @(*) begin
        if (is_branch_x) begin
            case (cond_x)
                2'b00: branch_taken_x = 1'b1; // Always
                2'b01: branch_taken_x = (r0_for_branch == 8'b0); // Zero
                2'b10: branch_taken_x = (r0_for_branch < 8'b0); // Negative
                2'b11: branch_taken_x = (r0_for_branch > 8'b0); // Positive
                default: branch_taken_x = 1'b0;
            endcase
        end else begin
            branch_taken_x = 1'b0;
        end
    end

    // Forwarding Logic for rs in Execute Stage
    always @(*) begin
        // Default: read from register file
        fwd_from_w = 1'b0;
        fwd_from_x = 1'b0;
        fwd_val_x = rf[rs_x];

        // Priority: W stage over X stage
        // Check if W stage is writing to the register we need (rs_x)
        // W stage writes if opcode is ADDI or MOV
        if ((opcode_w == 2'b00 || opcode_w == 2'b01) && rd_w == rs_x) begin
            fwd_from_w = 1'b1;
            fwd_val_x = alu_res_w;
        end else if ((opcode_x == 2'b00 || opcode_x == 2'b01) && rd_x == rs_x) begin
            fwd_from_x = 1'b1;
            fwd_val_x = alu_res_x;
        end else begin
            fwd_val_x = rf[rs_x];
        end
    end

    // Assign rs_val_x for use in ALU
    assign rs_val_x = fwd_val_x;

    // Pipeline Register Updates
    always @(posedge clk) begin
        if (rst) begin
            // Reset
            pc_f <= 5'b00000;
            instr_f <= 8'b0;
            npc_f <= 5'b00001;

            pc_x <= 5'b00000;
            instr_x <= 8'b0;
            npc_x <= 5'b00001;
            opcode_x <= 2'b00;
            rd_x <= 2'b00;
            rs_x <= 2'b00;
            imm_x <= 4'b0000;
            cond_x <= 2'b00;
            off_x <= 4'b0000;
            rs_val_x <= 8'b0;
            alu_res_x <= 8'b0;
            is_branch_x <= 1'b0;
            branch_taken_x <= 1'b0;
            branch_target_x <= 5'b00000;
            is_halt_x <= 1'b0;

            pc_w <= 5'b00000;
            instr_w <= 8'b0;
            npc_w <= 5'b00001;
            opcode_w <= 2'b00;
            rd_w <= 2'b00;
            alu_res_w <= 8'b0;
            is_branch_w <= 1'b0;
            branch_taken_w <= 1'b0;
            branch_target_w <= 5'b00000;
            is_halt_w <= 1'b0;

            // Reset Register File
            rf[0] <= 8'b0;
            rf[1] <= 8'b0;
            rf[2] <= 8'b0;
            rf[3] <= 8'b0;

            pc_out <= 5'b00000;
            halt_out <= 1'b0;
            r0_out <= 8'b0;
        end else if (halt_out) begin
            // Freeze pipeline
        end else begin
            // Update Writeback Stage
            pc_w <= pc_x;
            instr_w <= instr_x;
            npc_w <= npc_x;
            opcode_w <= opcode_x;
            rd_w <= rd_x;
            alu_res_w <= alu_res_x;
            is_branch_w <= is_branch_x;
            branch_taken_w <= branch_taken_x;
            branch_target_w <= branch_target_x;
            is_halt_w <= is_halt_x;

            // Update Execute Stage
            pc_x <= pc_f;
            instr_x <= instr_f;
            npc_x <= npc_f;
            // Decode fields are computed combinatorially from instr_x, so no need to store them explicitly in the register if we recompute.
            // However, we stored them in the always block above. Let's just update the registers that feed into the combinatorial logic.
            // Actually, the always block for decoding uses instr_x. So we just need to update instr_x.
            // The other fields (opcode_x, etc.) are derived from instr_x in the always @(*) block.
            // So we don't need to store opcode_x, rd_x, etc. in the pipeline register if we recompute them.
            // But wait, the forwarding logic and ALU logic depend on these fields.
            // Since they are derived from instr_x, and instr_x is updated, they will be updated in the next cycle.
            // So we only need to store instr_x, pc_x, npc_x, and the results of the ALU/Branch logic?
            // No, the ALU/Branch logic is combinatorial based on instr_x and rs_val_x.
            // So we just need to update instr_x, pc_x, npc_x.
            // The other signals (opcode_x, etc.) are just wires/regs that are updated combinatorially or in the next cycle?
            // Let's keep them as regs for clarity, but they are updated from the previous stage's values?
            // No, they are decoded from instr_x. So if we update instr_x, the next cycle's decode will use the new instr_x.
            // So we don't need to store opcode_x, etc. in the pipeline register.
            // But we do need to store the ALU result and branch info for the next stage (W).
            // So let's update the W stage with the current X stage's results.
            // And update the X stage with the current F stage's values.

            // Update Fetch Stage
            // Determine next PC for F stage
            if (is_halt_w) begin
                // Halt executed, freeze
                pc_f <= pc_f; // Freeze
            end else if (is_branch_w) begin
                if (branch_taken_w) begin
                    pc_f <= branch_target_w;
                end else begin
                    pc_f <= npc_w; // Sequential PC from delay slot
                end
            end else begin
                pc_f <= npc_w; // Sequential PC
            end

            // Fetch instruction
            if (!halt_out) begin
                instr_f <= IMEM_INIT[pc_f * 8 +: 8];
            end

            // Compute next PC for F stage (PC + 1)
            npc_f <= pc_f + 1'b1;

            // Update Outputs
            pc_out <= pc_f;
            
            // Update Register File and r0_out
            if (is_halt_w) begin
                // Do nothing
            end else if (opcode_w == 2'b00 || opcode_w == 2'b01) begin
                rf[rd_w] <= alu_res_w;
                if (rd_w == 2'b00) begin
                    r0_out <= alu_res_w;
                end
            end else begin
                // For branches and HALT, no register write
                // But r0_out should reflect the current R0.
                // If R0 was not written in this cycle, it keeps its value.
                // However, if a branch condition changed R0? No, branches don't write R0.
                // So r0_out only changes if rd_w == 0.
            end

            // Update Halt Output
            if (is_halt_w) begin
                halt_out <= 1'b1;
            end
        end
    end

endmodule

raw model output

```verilog
// Branch convention: The instruction immediately following a branch is always 
// executed (1 branch delay slot). The branch target PC is computed in the 
// Execute stage and committed in the Writeback stage. If the branch condition 
// is met, the PC is updated in the Writeback stage; otherwise, the PC continues 
// sequentially (PC + 1 from the delay slot instruction).

module mcu3 #(
    parameter [0:255] IMEM_INIT = 256'h0  // 32 instruction bytes, byte 0 at MSB
) (
    input               clk,
    input               rst,             // synchronous, active-high
    output reg [4:0]    pc_out,          // current fetched PC (visible for testing)
    output reg          halt_out,        // asserted forever after HALT executes
    output reg signed [7:0] r0_out       // current value of register R0
);

    // Register File: 4 registers, 8-bit signed
    reg signed [7:0] rf [0:3];

    // Pipeline Stage Registers
    // Fetch Stage
    reg [4:0] pc_f;
    reg [7:0] instr_f;
    reg [4:0] npc_f; // Next PC candidate (PC+1)

    // Execute Stage
    reg [4:0] pc_x;
    reg [7:0] instr_x;
    reg [4:0] npc_x; // Next PC candidate from F stage
    reg [1:0] opcode_x;
    reg [1:0] rd_x;
    reg [1:0] rs_x;
    reg [3:0] imm_x;
    reg [1:0] cond_x;
    reg [3:0] off_x;
    reg signed [7:0] rs_val_x; // Read value of rs
    reg signed [7:0] alu_res_x; // ALU result
    reg is_branch_x;
    reg branch_taken_x;
    reg [4:0] branch_target_x;
    reg is_halt_x;

    // Writeback Stage
    reg [4:0] pc_w;
    reg [7:0] instr_w;
    reg [4:0] npc_w;
    reg [1:0] opcode_w;
    reg [1:0] rd_w;
    reg signed [7:0] alu_res_w;
    reg is_branch_w;
    reg branch_taken_w;
    reg [4:0] branch_target_w;
    reg is_halt_w;

    // Forwarding Logic
    // We need to forward values from the Execute stage (which will be written in W) 
    // and the Writeback stage (which has just been written or is about to be committed)
    // to the Execute stage's rs read.
    // Since we have no stalls, we must forward immediately.
    // The "current" value of a register is either in the RF, or being produced by X or W.
    // Note: In a 3-stage pipeline, the result in X is available in the next cycle in W.
    // However, for the instruction in X, we need the value of rs.
    // If the previous instruction (in W) writes to rd == rs of current (in X), use W's result.
    // If the instruction before that (in X) writes to rd == rs of current (in X), use X's result.
    
    reg signed [7:0] fwd_val_x;
    reg fwd_from_w;
    reg fwd_from_x;

    // Decode logic for current instruction in X
    always @(*) begin
        opcode_x = instr_x[7:6];
        if (opcode_x == 2'b00) begin // ADDI
            rd_x = instr_x[5:4];
            imm_x = instr_x[3:0];
            rs_x = 2'b00; // Dummy
            cond_x = 2'b00; // Dummy
            off_x = 4'b0000; // Dummy
            is_branch_x = 1'b0;
            is_halt_x = 1'b0;
        end else if (opcode_x == 2'b01) begin // MOV
            rd_x = instr_x[5:4];
            rs_x = instr_x[3:2];
            imm_x = 4'b0000; // Dummy
            cond_x = 2'b00; // Dummy
            off_x = 4'b0000; // Dummy
            is_branch_x = 1'b0;
            is_halt_x = 1'b0;
        end else if (opcode_x == 2'b10) begin // BRcc
            cond_x = instr_x[5:4];
            off_x = instr_x[3:0];
            rd_x = 2'b00; // Dummy
            rs_x = 2'b00; // Dummy
            imm_x = 4'b0000; // Dummy
            is_branch_x = 1'b1;
            is_halt_x = 1'b0;
        end else begin // HALT
            rd_x = 2'b00;
            rs_x = 2'b00;
            imm_x = 4'b0000;
            cond_x = 2'b00;
            off_x = 4'b0000;
            is_branch_x = 1'b0;
            is_halt_x = 1'b1;
        end
    end

    // ALU and Branch Logic in Execute Stage
    always @(*) begin
        // Default
        alu_res_x = 8'b0;
        branch_taken_x = 1'b0;
        branch_target_x = 5'b00000;

        if (is_halt_x) begin
            alu_res_x = 8'b0;
        end else if (opcode_x == 2'b00) begin // ADDI
            // Sign extend imm[4] to 8 bits
            signed [7:0] imm_sext = {{4{imm_x[3]}}, imm_x};
            alu_res_x = rs_val_x + imm_sext;
        end else if (opcode_x == 2'b01) begin // MOV
            alu_res_x = rs_val_x;
        end else if (is_branch_x) begin
            // Compute branch target: PC + sign_ext(off, 4) + 1
            // The PC used here is the PC of the branch instruction itself (pc_x)
            signed [4:0] off_sext = {{5{off_x[3]}}, off_x};
            branch_target_x = pc_x + off_sext + 1'b1;
            
            // Evaluate condition based on R0
            // We need the current value of R0. 
            // Note: The branch condition depends on R0. 
            // If R0 was updated by the previous instruction (in W or X), we must use the forwarded value.
            // However, the forwarding logic below calculates rs_val_x. 
            // For branches, rs is dummy, but we need R0 specifically.
            // Let's handle R0 forwarding separately for branch conditions.
        end
    end

    // Determine the value of R0 for branch condition evaluation
    reg signed [7:0] r0_for_branch;
    always @(*) begin
        // Check if W stage is writing to R0
        if (opcode_w == 2'b00 && rd_w == 2'b00) begin
            r0_for_branch = alu_res_w;
        end else if (opcode_w == 2'b01 && rd_w == 2'b00) begin
            r0_for_branch = alu_res_w;
        end else begin
            // Check if X stage is writing to R0
            if (opcode_x == 2'b00 && rd_x == 2'b00) begin
                r0_for_branch = alu_res_x;
            end else if (opcode_x == 2'b01 && rd_x == 2'b00) begin
                r0_for_branch = alu_res_x;
            end else begin
                r0_for_branch = rf[0];
            end
        end
    end

    // Evaluate branch condition
    always @(*) begin
        if (is_branch_x) begin
            case (cond_x)
                2'b00: branch_taken_x = 1'b1; // Always
                2'b01: branch_taken_x = (r0_for_branch == 8'b0); // Zero
                2'b10: branch_taken_x = (r0_for_branch < 8'b0); // Negative
                2'b11: branch_taken_x = (r0_for_branch > 8'b0); // Positive
                default: branch_taken_x = 1'b0;
            endcase
        end else begin
            branch_taken_x = 1'b0;
        end
    end

    // Forwarding Logic for rs in Execute Stage
    always @(*) begin
        // Default: read from register file
        fwd_from_w = 1'b0;
        fwd_from_x = 1'b0;
        fwd_val_x = rf[rs_x];

        // Priority: W stage over X stage
        // Check if W stage is writing to the register we need (rs_x)
        // W stage writes if opcode is ADDI or MOV
        if ((opcode_w == 2'b00 || opcode_w == 2'b01) && rd_w == rs_x) begin
            fwd_from_w = 1'b1;
            fwd_val_x = alu_res_w;
        end else if ((opcode_x == 2'b00 || opcode_x == 2'b01) && rd_x == rs_x) begin
            fwd_from_x = 1'b1;
            fwd_val_x = alu_res_x;
        end else begin
            fwd_val_x = rf[rs_x];
        end
    end

    // Assign rs_val_x for use in ALU
    assign rs_val_x = fwd_val_x;

    // Pipeline Register Updates
    always @(posedge clk) begin
        if (rst) begin
            // Reset
            pc_f <= 5'b00000;
            instr_f <= 8'b0;
            npc_f <= 5'b00001;

            pc_x <= 5'b00000;
            instr_x <= 8'b0;
            npc_x <= 5'b00001;
            opcode_x <= 2'b00;
            rd_x <= 2'b00;
            rs_x <= 2'b00;
            imm_x <= 4'b0000;
            cond_x <= 2'b00;
            off_x <= 4'b0000;
            rs_val_x <= 8'b0;
            alu_res_x <= 8'b0;
            is_branch_x <= 1'b0;
            branch_taken_x <= 1'b0;
            branch_target_x <= 5'b00000;
            is_halt_x <= 1'b0;

            pc_w <= 5'b00000;
            instr_w <= 8'b0;
            npc_w <= 5'b00001;
            opcode_w <= 2'b00;
            rd_w <= 2'b00;
            alu_res_w <= 8'b0;
            is_branch_w <= 1'b0;
            branch_taken_w <= 1'b0;
            branch_target_w <= 5'b00000;
            is_halt_w <= 1'b0;

            // Reset Register File
            rf[0] <= 8'b0;
            rf[1] <= 8'b0;
            rf[2] <= 8'b0;
            rf[3] <= 8'b0;

            pc_out <= 5'b00000;
            halt_out <= 1'b0;
            r0_out <= 8'b0;
        end else if (halt_out) begin
            // Freeze pipeline
        end else begin
            // Update Writeback Stage
            pc_w <= pc_x;
            instr_w <= instr_x;
            npc_w <= npc_x;
            opcode_w <= opcode_x;
            rd_w <= rd_x;
            alu_res_w <= alu_res_x;
            is_branch_w <= is_branch_x;
            branch_taken_w <= branch_taken_x;
            branch_target_w <= branch_target_x;
            is_halt_w <= is_halt_x;

            // Update Execute Stage
            pc_x <= pc_f;
            instr_x <= instr_f;
            npc_x <= npc_f;
            // Decode fields are computed combinatorially from instr_x, so no need to store them explicitly in the register if we recompute.
            // However, we stored them in the always block above. Let's just update the registers that feed into the combinatorial logic.
            // Actually, the always block for decoding uses instr_x. So we just need to update instr_x.
            // The other fields (opcode_x, etc.) are derived from instr_x in the always @(*) block.
            // So we don't need to store opcode_x, rd_x, etc. in the pipeline register if we recompute them.
            // But wait, the forwarding logic and ALU logic depend on these fields.
            // Since they are derived from instr_x, and instr_x is updated, they will be updated in the next cycle.
            // So we only need to store instr_x, pc_x, npc_x, and the results of the ALU/Branch logic?
            // No, the ALU/Branch logic is combinatorial based on instr_x and rs_val_x.
            // So we just need to update instr_x, pc_x, npc_x.
            // The other signals (opcode_x, etc.) are just wires/regs that are updated combinatorially or in the next cycle?
            // Let's keep them as regs for clarity, but they are updated from the previous stage's values?
            // No, they are decoded from instr_x. So if we update instr_x, the next cycle's decode will use the new instr_x.
            // So we don't need to store opcode_x, etc. in the pipeline register.
            // But we do need to store the ALU result and branch info for the next stage (W).
            // So let's update the W stage with the current X stage's results.
            // And update the X stage with the current F stage's values.

            // Update Fetch Stage
            // Determine next PC for F stage
            if (is_halt_w) begin
                // Halt executed, freeze
                pc_f <= pc_f; // Freeze
            end else if (is_branch_w) begin
                if (branch_taken_w) begin
                    pc_f <= branch_target_w;
                end else begin
                    pc_f <= npc_w; // Sequential PC from delay slot
                end
            end else begin
                pc_f <= npc_w; // Sequential PC
            end

            // Fetch instruction
            if (!halt_out) begin
                instr_f <= IMEM_INIT[pc_f * 8 +: 8];
            end

            // Compute next PC for F stage (PC + 1)
            npc_f <= pc_f + 1'b1;

            // Update Outputs
            pc_out <= pc_f;
            
            // Update Register File and r0_out
            if (is_halt_w) begin
                // Do nothing
            end else if (opcode_w == 2'b00 || opcode_w == 2'b01) begin
                rf[rd_w] <= alu_res_w;
                if (rd_w == 2'b00) begin
                    r0_out <= alu_res_w;
                end
            end else begin
                // For branches and HALT, no register write
                // But r0_out should reflect the current R0.
                // If R0 was not written in this cycle, it keeps its value.
                // However, if a branch condition changed R0? No, branches don't write R0.
                // So r0_out only changes if rd_w == 0.
            end

            // Update Halt Output
            if (is_halt_w) begin
                halt_out <= 1'b1;
            end
        end
    end

endmodule
```