← all candidates base

base_1

line-item PPA

passes_tests0/0
area_um2
fmax_mhz
ops_per_sec
switching_alpha
est_power_uW

per-program functional

no functional run (did not compile)

pipeline shape (heuristic register bank count)

F stage regs3
X stage regs16
W stage regs10
unstaged regs2

mistakes detected

extracted RTL

module mcu3 #(
    parameter [0:255] IMEM_INIT = 256'h0  // 32 instruction bytes, byte 0 at MSB
) (
    input               clk,
    input               rst,             // synchronous, active-high
    output reg [4:0]    pc_out,          // current fetched PC (visible for testing)
    output reg          halt_out,        // asserted forever after HALT executes
    output reg signed [7:0] r0_out       // current value of register R0
);

    // Register File: 4 registers, 8-bit signed
    reg signed [7:0] rf [0:3];
    
    // Pipeline Stage Registers
    // Fetch Stage
    reg [4:0] pc_f;
    reg [7:0] instr_f;
    reg [4:0] npc_f; // Next PC candidate (sequential)
    
    // Execute Stage
    reg [7:0] instr_x;
    reg [4:0] pc_x;
    reg [1:0] opcode_x;
    reg [1:0] rd_x;
    reg [1:0] rs_x;
    reg [3:0] imm_x;
    reg [1:0] cond_x;
    reg [3:0] off_x;
    reg signed [7:0] rs_val_x;
    reg signed [7:0] result_x;
    reg is_branch_x;
    reg branch_taken_x;
    reg [4:0] branch_target_x;
    reg is_halt_x;
    
    // Writeback Stage
    reg [7:0] instr_w;
    reg [4:0] pc_w;
    reg [1:0] opcode_w;
    reg [1:0] rd_w;
    reg signed [7:0] result_w;
    reg is_branch_w;
    reg branch_taken_w;
    reg [4:0] branch_target_w;
    reg is_halt_w;
    reg [4:0] npc_w; // Next PC from previous stage
    
    // Forwarding signals
    reg [1:0] fwd_rs_x; // 0: RF, 1: EX result, 2: WB result
    reg [1:0] fwd_rd_x; // Not used for rd, but for completeness
    
    // Internal halt flag
    reg halt_int;

    // Initialize registers
    integer i;
    initial begin
        for (i = 0; i < 4; i = i + 1) begin
            rf[i] = 8's0;
        end
    end

    // Helper function to sign extend 4-bit immediate to 8-bit
    function signed [7:0] sign_ext4;
        input [3:0] val;
        begin
            sign_ext4 = {{4{val[3]}}, val};
        end
    endfunction

    // Helper function to sign extend 4-bit offset to 5-bit (for PC addition)
    function signed [4:0] sign_ext4_to_5;
        input [3:0] val;
        begin
            sign_ext4_to_5 = {{5{val[3]}}, val};
        end
    endfunction

    // Combinational logic for forwarding
    // We need to forward to rs in the Execute stage
    // Sources: WB stage result, EX stage result
    // Destination: rs in EX stage
    
    always @(*) begin
        // Default: read from register file
        fwd_rs_x = 2'b00;
        
        // Check if rs matches rd in WB stage and WB is valid
        if (rd_w != 2'b00 && rs_x == rd_w && !is_halt_w) begin
            fwd_rs_x = 2'b10; // Forward from WB
        end
        // Check if rs matches rd in EX stage and EX is valid (and not WB)
        else if (rd_x != 2'b00 && rs_x == rd_x && !is_halt_x) begin
            fwd_rs_x = 2'b01; // Forward from EX
        end
    end

    // Select rs value based on forwarding
    wire signed [7:0] rs_val_sel;
    assign rs_val_sel = fwd_rs_x == 2'b00 ? rf[rs_x] : 
                        fwd_rs_x == 2'b01 ? result_x : 
                        result_w;

    // Pipeline Stage Logic
    always @(posedge clk) begin
        if (rst) begin
            // Reset
            pc_f <= 5'b00000;
            instr_f <= 8'b0;
            npc_f <= 5'b00001;
            
            instr_x <= 8'b0;
            pc_x <= 5'b00000;
            opcode_x <= 2'b00;
            rd_x <= 2'b00;
            rs_x <= 2'b00;
            imm_x <= 4'b0;
            cond_x <= 2'b00;
            off_x <= 4'b0;
            rs_val_x <= 8's0;
            result_x <= 8's0;
            is_branch_x <= 1'b0;
            branch_taken_x <= 1'b0;
            branch_target_x <= 5'b0;
            is_halt_x <= 1'b0;
            
            instr_w <= 8'b0;
            pc_w <= 5'b00000;
            opcode_w <= 2'b00;
            rd_w <= 2'b00;
            result_w <= 8's0;
            is_branch_w <= 1'b0;
            branch_taken_w <= 1'b0;
            branch_target_w <= 5'b0;
            is_halt_w <= 1'b0;
            npc_w <= 5'b00001;
            
            halt_int <= 1'b0;
            
            // Reset register file
            for (i = 0; i < 4; i = i + 1) begin
                rf[i] <= 8's0;
            end
        end else if (halt_int) begin
            // Halt: freeze pipeline
            // Do nothing, keep current state
        end else begin
            // Normal operation
            
            // Writeback Stage: Update register file or halt
            if (is_halt_w) begin
                halt_int <= 1'b1;
            end else if (opcode_w == 2'b00 || opcode_w == 2'b01) begin
                // ADDI or MOV: write result to rd
                if (rd_w != 2'b00) begin
                    rf[rd_w] <= result_w;
                end
            end
            // BRcc does not write to register file
            
            // Update WB stage from EX stage
            instr_w <= instr_x;
            pc_w <= pc_x;
            opcode_w <= opcode_x;
            rd_w <= rd_x;
            result_w <= result_x;
            is_branch_w <= is_branch_x;
            branch_taken_w <= branch_taken_x;
            branch_target_w <= branch_target_x;
            is_halt_w <= is_halt_x;
            npc_w <= npc_f; // The next PC computed in fetch
            
            // Update EX stage from F stage
            instr_x <= instr_f;
            pc_x <= pc_f;
            
            // Decode instruction in EX stage
            opcode_x <= instr_f[7:6];
            rd_x <= instr_f[5:4];
            
            if (instr_f[7:6] == 2'b00) begin
                // ADDI: 00 rd[2] imm[4]
                rs_x <= 2'b00; // Not used
                imm_x <= instr_f[3:0];
                cond_x <= 2'b00;
                off_x <= 4'b0;
                is_branch_x <= 1'b0;
                is_halt_x <= 1'b0;
            end else if (instr_f[7:6] == 2'b01) begin
                // MOV: 01 rd[2] rs[2] _[2]
                rs_x <= instr_f[3:2];
                imm_x <= 4'b0;
                cond_x <= 2'b00;
                off_x <= 4'b0;
                is_branch_x <= 1'b0;
                is_halt_x <= 1'b0;
            end else if (instr_f[7:6] == 2'b10) begin
                // BRcc: 10 cond[2] off[4]
                cond_x <= instr_f[5:4];
                off_x <= instr_f[3:0];
                rd_x <= 2'b00;
                rs_x <= 2'b00;
                imm_x <= 4'b0;
                is_branch_x <= 1'b1;
                is_halt_x <= 1'b0;
            end else begin
                // HALT: 11 _ _ _ _ _ _
                rd_x <= 2'b00;
                rs_x <= 2'b00;
                imm_x <= 4'b0;
                cond_x <= 2'b00;
                off_x <= 4'b0;
                is_branch_x <= 1'b0;
                is_halt_x <= 1'b1;
            end
            
            // Execute stage computation
            if (is_halt_x) begin
                result_x <= 8's0;
                branch_taken_x <= 1'b0;
                branch_target_x <= 5'b0;
            end else if (opcode_x == 2'b00) begin
                // ADDI: R[rd] <= R[rd] + sign_ext(imm, 4)
                // rd is the source and destination
                signed [7:0] rd_val;
                rd_val = fwd_rs_x == 2'b00 ? rf[rd_x] : 
                         fwd_rs_x == 2'b01 ? result_x : 
                         result_w;
                result_x = rd_val + sign_ext4(imm_x);
                branch_taken_x <= 1'b0;
                branch_target_x <= 5'b0;
            end else if (opcode_x == 2'b01) begin
                // MOV: R[rd] <= R[rs]
                result_x = rs_val_sel;
                branch_taken_x <= 1'b0;
                branch_target_x <= 5'b0;
            end else if (is_branch_x) begin
                // BRcc: Evaluate condition
                result_x <= 8's0; // Not used
                branch_target_x <= pc_x + sign_ext4_to_5(off_x) + 5'b1;
                
                case (cond_x)
                    2'b00: branch_taken_x <= 1'b1; // Always
                    2'b01: branch_taken_x <= (rf[0] == 8's0) ? 1'b1 : 1'b0; // Zero R0
                    2'b10: branch_taken_x <= (rf[0] < 8's0) ? 1'b1 : 1'b0;   // Neg R0
                    2'b11: branch_taken_x <= (rf[0] > 8's0) ? 1'b1 : 1'b0;   // Pos R0
                    default: branch_taken_x <= 1'b0;
                endcase
            end else begin
                result_x <= 8's0;
                branch_taken_x <= 1'b0;
                branch_target_x <= 5'b0;
            end
            
            // Update F stage
            if (halt_int) begin
                // Already halted, do nothing
            end else if (is_branch_w && branch_taken_w) begin
                // Branch taken: next PC is branch target
                // But we must account for the delay slot
                // The instruction after the branch (at npc_w) is executed
                // The next fetch should be the branch target
                pc_f <= branch_target_w;
                // Fetch the instruction at the branch target
                // Note: IMEM_INIT is indexed with byte 0 at MSB
                // IMEM_INIT[PC*8 +: 8] means bits [PC*8+7 : PC*8]
                instr_f <= IMEM_INIT[branch_target_w*8 +: 8];
                npc_f <= branch_target_w + 5'b1;
            end else begin
                // No branch taken or not a branch: sequential
                pc_f <= npc_w;
                instr_f <= IMEM_INIT[npc_w*8 +: 8];
                npc_f <= npc_w + 5'b1;
            end
        end
    end

    // Output assignments
    always @(posedge clk) begin
        if (rst) begin
            pc_out <= 5'b00000;
            halt_out <= 1'b0;
            r0_out <= 8's0;
        end else if (halt_int) begin
            halt_out <= 1'b1;
            // pc_out and r0_out remain as they were when halt occurred
        end else begin
            pc_out <= pc_f;
            halt_out <= 1'b0;
            // r0_out reflects the most recently written value of R[0]
            // We need to track the last written value to R[0]
            // This is tricky because we need to know what was written in WB
            // Let's use a separate register for r0_out
        end
    end

    // Track R0 for output
    reg signed [7:0] r0_last;
    always @(posedge clk) begin
        if (rst) begin
            r0_last <= 8's0;
        end else if (halt_int) begin
            // Keep last value
        end else begin
            // Check if R[0] was written in this cycle
            if (opcode_w == 2'b00 || opcode_w == 2'b01) begin
                if (rd_w == 2'b00) begin
                    r0_last <= result_w;
                end else begin
                    // Keep previous value
                end
            end else begin
                // Keep previous value
            end
        end
    end

    // Assign r0_out
    assign r0_out = r0_last;

endmodule

raw model output

```verilog
// Branch convention: The instruction immediately following a BRcc is always 
// executed (branch delay slot). If the branch is taken, the instruction in 
// the delay slot is fetched and executed, but its architectural effects 
// (register writes) are discarded if the branch target is different from 
// the sequential PC. If the branch is not taken, the delay slot instruction 
// executes normally.

module mcu3 #(
    parameter [0:255] IMEM_INIT = 256'h0  // 32 instruction bytes, byte 0 at MSB
) (
    input               clk,
    input               rst,             // synchronous, active-high
    output reg [4:0]    pc_out,          // current fetched PC (visible for testing)
    output reg          halt_out,        // asserted forever after HALT executes
    output reg signed [7:0] r0_out       // current value of register R0
);

    // Register File: 4 registers, 8-bit signed
    reg signed [7:0] rf [0:3];
    
    // Pipeline Stage Registers
    // Fetch Stage
    reg [4:0] pc_f;
    reg [7:0] instr_f;
    reg [4:0] npc_f; // Next PC candidate (sequential)
    
    // Execute Stage
    reg [7:0] instr_x;
    reg [4:0] pc_x;
    reg [1:0] opcode_x;
    reg [1:0] rd_x;
    reg [1:0] rs_x;
    reg [3:0] imm_x;
    reg [1:0] cond_x;
    reg [3:0] off_x;
    reg signed [7:0] rs_val_x;
    reg signed [7:0] result_x;
    reg is_branch_x;
    reg branch_taken_x;
    reg [4:0] branch_target_x;
    reg is_halt_x;
    
    // Writeback Stage
    reg [7:0] instr_w;
    reg [4:0] pc_w;
    reg [1:0] opcode_w;
    reg [1:0] rd_w;
    reg signed [7:0] result_w;
    reg is_branch_w;
    reg branch_taken_w;
    reg [4:0] branch_target_w;
    reg is_halt_w;
    reg [4:0] npc_w; // Next PC from previous stage
    
    // Forwarding signals
    reg [1:0] fwd_rs_x; // 0: RF, 1: EX result, 2: WB result
    reg [1:0] fwd_rd_x; // Not used for rd, but for completeness
    
    // Internal halt flag
    reg halt_int;

    // Initialize registers
    integer i;
    initial begin
        for (i = 0; i < 4; i = i + 1) begin
            rf[i] = 8's0;
        end
    end

    // Helper function to sign extend 4-bit immediate to 8-bit
    function signed [7:0] sign_ext4;
        input [3:0] val;
        begin
            sign_ext4 = {{4{val[3]}}, val};
        end
    endfunction

    // Helper function to sign extend 4-bit offset to 5-bit (for PC addition)
    function signed [4:0] sign_ext4_to_5;
        input [3:0] val;
        begin
            sign_ext4_to_5 = {{5{val[3]}}, val};
        end
    endfunction

    // Combinational logic for forwarding
    // We need to forward to rs in the Execute stage
    // Sources: WB stage result, EX stage result
    // Destination: rs in EX stage
    
    always @(*) begin
        // Default: read from register file
        fwd_rs_x = 2'b00;
        
        // Check if rs matches rd in WB stage and WB is valid
        if (rd_w != 2'b00 && rs_x == rd_w && !is_halt_w) begin
            fwd_rs_x = 2'b10; // Forward from WB
        end
        // Check if rs matches rd in EX stage and EX is valid (and not WB)
        else if (rd_x != 2'b00 && rs_x == rd_x && !is_halt_x) begin
            fwd_rs_x = 2'b01; // Forward from EX
        end
    end

    // Select rs value based on forwarding
    wire signed [7:0] rs_val_sel;
    assign rs_val_sel = fwd_rs_x == 2'b00 ? rf[rs_x] : 
                        fwd_rs_x == 2'b01 ? result_x : 
                        result_w;

    // Pipeline Stage Logic
    always @(posedge clk) begin
        if (rst) begin
            // Reset
            pc_f <= 5'b00000;
            instr_f <= 8'b0;
            npc_f <= 5'b00001;
            
            instr_x <= 8'b0;
            pc_x <= 5'b00000;
            opcode_x <= 2'b00;
            rd_x <= 2'b00;
            rs_x <= 2'b00;
            imm_x <= 4'b0;
            cond_x <= 2'b00;
            off_x <= 4'b0;
            rs_val_x <= 8's0;
            result_x <= 8's0;
            is_branch_x <= 1'b0;
            branch_taken_x <= 1'b0;
            branch_target_x <= 5'b0;
            is_halt_x <= 1'b0;
            
            instr_w <= 8'b0;
            pc_w <= 5'b00000;
            opcode_w <= 2'b00;
            rd_w <= 2'b00;
            result_w <= 8's0;
            is_branch_w <= 1'b0;
            branch_taken_w <= 1'b0;
            branch_target_w <= 5'b0;
            is_halt_w <= 1'b0;
            npc_w <= 5'b00001;
            
            halt_int <= 1'b0;
            
            // Reset register file
            for (i = 0; i < 4; i = i + 1) begin
                rf[i] <= 8's0;
            end
        end else if (halt_int) begin
            // Halt: freeze pipeline
            // Do nothing, keep current state
        end else begin
            // Normal operation
            
            // Writeback Stage: Update register file or halt
            if (is_halt_w) begin
                halt_int <= 1'b1;
            end else if (opcode_w == 2'b00 || opcode_w == 2'b01) begin
                // ADDI or MOV: write result to rd
                if (rd_w != 2'b00) begin
                    rf[rd_w] <= result_w;
                end
            end
            // BRcc does not write to register file
            
            // Update WB stage from EX stage
            instr_w <= instr_x;
            pc_w <= pc_x;
            opcode_w <= opcode_x;
            rd_w <= rd_x;
            result_w <= result_x;
            is_branch_w <= is_branch_x;
            branch_taken_w <= branch_taken_x;
            branch_target_w <= branch_target_x;
            is_halt_w <= is_halt_x;
            npc_w <= npc_f; // The next PC computed in fetch
            
            // Update EX stage from F stage
            instr_x <= instr_f;
            pc_x <= pc_f;
            
            // Decode instruction in EX stage
            opcode_x <= instr_f[7:6];
            rd_x <= instr_f[5:4];
            
            if (instr_f[7:6] == 2'b00) begin
                // ADDI: 00 rd[2] imm[4]
                rs_x <= 2'b00; // Not used
                imm_x <= instr_f[3:0];
                cond_x <= 2'b00;
                off_x <= 4'b0;
                is_branch_x <= 1'b0;
                is_halt_x <= 1'b0;
            end else if (instr_f[7:6] == 2'b01) begin
                // MOV: 01 rd[2] rs[2] _[2]
                rs_x <= instr_f[3:2];
                imm_x <= 4'b0;
                cond_x <= 2'b00;
                off_x <= 4'b0;
                is_branch_x <= 1'b0;
                is_halt_x <= 1'b0;
            end else if (instr_f[7:6] == 2'b10) begin
                // BRcc: 10 cond[2] off[4]
                cond_x <= instr_f[5:4];
                off_x <= instr_f[3:0];
                rd_x <= 2'b00;
                rs_x <= 2'b00;
                imm_x <= 4'b0;
                is_branch_x <= 1'b1;
                is_halt_x <= 1'b0;
            end else begin
                // HALT: 11 _ _ _ _ _ _
                rd_x <= 2'b00;
                rs_x <= 2'b00;
                imm_x <= 4'b0;
                cond_x <= 2'b00;
                off_x <= 4'b0;
                is_branch_x <= 1'b0;
                is_halt_x <= 1'b1;
            end
            
            // Execute stage computation
            if (is_halt_x) begin
                result_x <= 8's0;
                branch_taken_x <= 1'b0;
                branch_target_x <= 5'b0;
            end else if (opcode_x == 2'b00) begin
                // ADDI: R[rd] <= R[rd] + sign_ext(imm, 4)
                // rd is the source and destination
                signed [7:0] rd_val;
                rd_val = fwd_rs_x == 2'b00 ? rf[rd_x] : 
                         fwd_rs_x == 2'b01 ? result_x : 
                         result_w;
                result_x = rd_val + sign_ext4(imm_x);
                branch_taken_x <= 1'b0;
                branch_target_x <= 5'b0;
            end else if (opcode_x == 2'b01) begin
                // MOV: R[rd] <= R[rs]
                result_x = rs_val_sel;
                branch_taken_x <= 1'b0;
                branch_target_x <= 5'b0;
            end else if (is_branch_x) begin
                // BRcc: Evaluate condition
                result_x <= 8's0; // Not used
                branch_target_x <= pc_x + sign_ext4_to_5(off_x) + 5'b1;
                
                case (cond_x)
                    2'b00: branch_taken_x <= 1'b1; // Always
                    2'b01: branch_taken_x <= (rf[0] == 8's0) ? 1'b1 : 1'b0; // Zero R0
                    2'b10: branch_taken_x <= (rf[0] < 8's0) ? 1'b1 : 1'b0;   // Neg R0
                    2'b11: branch_taken_x <= (rf[0] > 8's0) ? 1'b1 : 1'b0;   // Pos R0
                    default: branch_taken_x <= 1'b0;
                endcase
            end else begin
                result_x <= 8's0;
                branch_taken_x <= 1'b0;
                branch_target_x <= 5'b0;
            end
            
            // Update F stage
            if (halt_int) begin
                // Already halted, do nothing
            end else if (is_branch_w && branch_taken_w) begin
                // Branch taken: next PC is branch target
                // But we must account for the delay slot
                // The instruction after the branch (at npc_w) is executed
                // The next fetch should be the branch target
                pc_f <= branch_target_w;
                // Fetch the instruction at the branch target
                // Note: IMEM_INIT is indexed with byte 0 at MSB
                // IMEM_INIT[PC*8 +: 8] means bits [PC*8+7 : PC*8]
                instr_f <= IMEM_INIT[branch_target_w*8 +: 8];
                npc_f <= branch_target_w + 5'b1;
            end else begin
                // No branch taken or not a branch: sequential
                pc_f <= npc_w;
                instr_f <= IMEM_INIT[npc_w*8 +: 8];
                npc_f <= npc_w + 5'b1;
            end
        end
    end

    // Output assignments
    always @(posedge clk) begin
        if (rst) begin
            pc_out <= 5'b00000;
            halt_out <= 1'b0;
            r0_out <= 8's0;
        end else if (halt_int) begin
            halt_out <= 1'b1;
            // pc_out and r0_out remain as they were when halt occurred
        end else begin
            pc_out <= pc_f;
            halt_out <= 1'b0;
            // r0_out reflects the most recently written value of R[0]
            // We need to track the last written value to R[0]
            // This is tricky because we need to know what was written in WB
            // Let's use a separate register for r0_out
        end
    end

    // Track R0 for output
    reg signed [7:0] r0_last;
    always @(posedge clk) begin
        if (rst) begin
            r0_last <= 8's0;
        end else if (halt_int) begin
            // Keep last value
        end else begin
            // Check if R[0] was written in this cycle
            if (opcode_w == 2'b00 || opcode_w == 2'b01) begin
                if (rd_w == 2'b00) begin
                    r0_last <= result_w;
                end else begin
                    // Keep previous value
                end
            end else begin
                // Keep previous value
            end
        end
    end

    // Assign r0_out
    assign r0_out = r0_last;

endmodule
```