← all candidates base

base_6

line-item PPA

passes_tests0/0
area_um2
fmax_mhz
ops_per_sec
switching_alpha
est_power_uW

per-program functional

no functional run (did not compile)

pipeline shape (heuristic register bank count)

F stage regs2
X stage regs15
W stage regs7
unstaged regs5

mistakes detected

(no automated diagnostics fired)

extracted RTL

module mcu3 #(
    parameter [0:255] IMEM_INIT = 256'h0  // 32 instruction bytes, byte 0 at MSB
) (
    input               clk,
    input               rst,             // synchronous, active-high
    output reg [4:0]    pc_out,          // current fetched PC (visible for testing)
    output reg          halt_out,        // asserted forever after HALT executes
    output reg signed [7:0] r0_out       // current value of register R0
);

    // Pipeline registers
    // Stage F (Fetch)
    reg [4:0] pc_f;
    reg [7:0] instr_f;
    
    // Stage X (Execute)
    reg [4:0] pc_x;
    reg [7:0] instr_x;
    reg [1:0] opcode_x;
    reg [1:0] rd_x;
    reg [1:0] rs_x;
    reg [1:0] cond_x;
    reg [3:0] imm_x;
    reg [3:0] off_x;
    reg [7:0] alu_result_x;
    reg       is_branch_x;
    reg       branch_taken_x;
    reg       is_halt_x;
    reg       rd_valid_x;
    
    // Stage W (Writeback)
    reg [4:0] pc_w;
    reg [7:0] instr_w;
    reg [1:0] opcode_w;
    reg [1:0] rd_w;
    reg [7:0] alu_result_w;
    reg       is_halt_w;
    reg       rd_valid_w;
    
    // Register File
    reg signed [7:0] rf [0:3];
    
    // Forwarding logic
    reg signed [7:0] fwd_val_x;
    reg       fwd_valid_x;
    
    // Next PC logic
    reg [4:0] pc_next;
    
    // Decode F stage
    always @(*) begin
        // Fetch instruction from IMEM
        // IMEM_INIT is 256 bits, 32 bytes. Byte 0 is at MSB.
        // Byte address PC maps to bits [PC*8 +: 8]
        instr_f = IMEM_INIT[pc_f*8 +: 8];
    end
    
    // Decode X stage
    always @(*) begin
        opcode_x = instr_x[7:6];
        case (opcode_x)
            2'b00: begin // ADDI
                rd_x = instr_x[5:4];
                imm_x = instr_x[3:0];
                is_branch_x = 1'b0;
                is_halt_x = 1'b0;
                rd_valid_x = 1'b1;
            end
            2'b01: begin // MOV
                rd_x = instr_x[5:4];
                rs_x = instr_x[3:2];
                is_branch_x = 1'b0;
                is_halt_x = 1'b0;
                rd_valid_x = 1'b1;
            end
            2'b10: begin // BRcc
                cond_x = instr_x[5:4];
                off_x = instr_x[3:0];
                is_branch_x = 1'b1;
                is_halt_x = 1'b0;
                rd_valid_x = 1'b0;
            end
            default: begin // HALT
                is_branch_x = 1'b0;
                is_halt_x = 1'b1;
                rd_valid_x = 1'b0;
            end
        endcase
    end
    
    // Forwarding logic for X stage
    // Forward from W stage if W is writing to the register X is reading
    always @(*) begin
        if (opcode_x == 2'b01 && rd_valid_w && (rs_x == rd_w)) begin
            fwd_val_x = alu_result_w;
            fwd_valid_x = 1'b1;
        end else if (opcode_x == 2'b00 && rd_valid_w && (rd_x == rd_w)) begin
            // For ADDI, we might need to forward if rd is also rs? No, ADDI doesn't read rs.
            // But if ADDI writes to rd, and next ADDI reads rd? No, ADDI doesn't read rd.
            // Actually, ADDI reads nothing from RF. MOV reads rs.
            // So forwarding is only needed for MOV reading rs.
            fwd_val_x = alu_result_w;
            fwd_valid_x = 1'b1;
        end else begin
            fwd_valid_x = 1'b0;
            fwd_val_x = 8'b0;
        end
    end
    
    // Execute stage ALU and branch logic
    always @(*) begin
        // Default values
        alu_result_x = 8'b0;
        branch_taken_x = 1'b0;
        
        if (opcode_x == 2'b00) begin // ADDI
            // Read rd from RF, with forwarding
            reg signed [7:0] rd_val;
            if (fwd_valid_x && (rd_x == rd_w)) begin
                rd_val = fwd_val_x;
            end else begin
                rd_val = rf[rd_x];
            end
            // Sign extend imm
            reg signed [7:0] imm_sext;
            imm_sext = {{4{imm_x[3]}}, imm_x};
            alu_result_x = rd_val + imm_sext;
        end else if (opcode_x == 2'b01) begin // MOV
            // Read rs from RF, with forwarding
            reg signed [7:0] rs_val;
            if (fwd_valid_x && (rs_x == rd_w)) begin
                rs_val = fwd_val_x;
            end else begin
                rs_val = rf[rs_x];
            end
            alu_result_x = rs_val;
        end else if (opcode_x == 2'b10) begin // BRcc
            // Determine if branch is taken
            case (cond_x)
                2'b00: branch_taken_x = 1'b1; // Always
                2'b01: branch_taken_x = (r0_out == 8'sb0); // Zero
                2'b10: branch_taken_x = (r0_out < 8'sb0);  // Negative
                2'b11: branch_taken_x = (r0_out > 8'sb0);  // Positive
                default: branch_taken_x = 1'b0;
            endcase
            // Calculate branch target
            reg signed [7:0] off_sext;
            off_sext = {{4{off_x[3]}}, off_x};
            // PC for branch is pc_x + 1 + off
            // But we need to output the target for W stage
            // We'll compute the target in W stage or here
            // Let's store the target in alu_result_x for simplicity
            alu_result_x = pc_x + 1 + off_sext;
        end else if (opcode_x == 2'b11) begin // HALT
            // Nothing to compute
        end
    end
    
    // Pipeline registers update
    always @(posedge clk) begin
        if (rst) begin
            // Reset all pipeline stages
            pc_f <= 5'b0;
            instr_f <= 8'b0;
            
            pc_x <= 5'b0;
            instr_x <= 8'b0;
            opcode_x <= 2'b0;
            rd_x <= 2'b0;
            rs_x <= 2'b0;
            cond_x <= 2'b0;
            imm_x <= 4'b0;
            off_x <= 4'b0;
            alu_result_x <= 8'b0;
            is_branch_x <= 1'b0;
            branch_taken_x <= 1'b0;
            is_halt_x <= 1'b0;
            rd_valid_x <= 1'b0;
            
            pc_w <= 5'b0;
            instr_w <= 8'b0;
            opcode_w <= 2'b0;
            rd_w <= 2'b0;
            alu_result_w <= 8'b0;
            is_halt_w <= 1'b0;
            rd_valid_w <= 1'b0;
            
            // Reset register file
            rf[0] <= 8'sb0;
            rf[1] <= 8'sb0;
            rf[2] <= 8'sb0;
            rf[3] <= 8'sb0;
            
            // Reset outputs
            pc_out <= 5'b0;
            halt_out <= 1'b0;
            r0_out <= 8'sb0;
        end else begin
            // Update F stage
            if (!halt_out) begin
                pc_f <= pc_next;
                instr_f <= IMEM_INIT[pc_next*8 +: 8];
            end
            
            // Update X stage
            if (!halt_out) begin
                pc_x <= pc_f;
                instr_x <= instr_f;
                opcode_x <= instr_f[7:6];
                case (instr_f[7:6])
                    2'b00: begin
                        rd_x <= instr_f[5:4];
                        imm_x <= instr_f[3:0];
                    end
                    2'b01: begin
                        rd_x <= instr_f[5:4];
                        rs_x <= instr_f[3:2];
                    end
                    2'b10: begin
                        cond_x <= instr_f[5:4];
                        off_x <= instr_f[3:0];
                    end
                    default: begin
                        // HALT
                    end
                endcase
                is_branch_x <= (instr_f[7:6] == 2'b10);
                is_halt_x <= (instr_f[7:6] == 2'b11);
                rd_valid_x <= (instr_f[7:6] != 2'b10) && (instr_f[7:6] != 2'b11);
            end
            
            // Update W stage
            if (!halt_out) begin
                pc_w <= pc_x;
                instr_w <= instr_x;
                opcode_w <= opcode_x;
                rd_w <= rd_x;
                alu_result_w <= alu_result_x;
                is_halt_w <= is_halt_x;
                rd_valid_w <= rd_valid_x;
            end
            
            // Update register file
            if (rd_valid_w && !is_halt_w && !halt_out) begin
                rf[rd_w] <= alu_result_w;
            end
            
            // Update outputs
            pc_out <= pc_f;
            if (is_halt_w || halt_out) begin
                halt_out <= 1'b1;
            end else begin
                halt_out <= 1'b0;
            end
            
            // Update r0_out
            if (rd_valid_w && (rd_w == 2'b0) && !is_halt_w && !halt_out) begin
                r0_out <= alu_result_w;
            end else if (halt_out) begin
                // Keep r0_out as is
            end else begin
                // If no write to R0, keep previous value? 
                // The spec says "r0_out always reflects the most recently written value of R[0]"
                // So we need to keep it if not written
                // But we also need to handle the case where R0 is updated in RF
                // Let's just mirror rf[0] if R0 was written, otherwise keep r0_out
                // Actually, let's just mirror rf[0] always for simplicity, since rf[0] is the source of truth
                r0_out <= rf[0];
            end
            
            // Compute next PC
            if (is_branch_x && branch_taken_x) begin
                // Branch taken, target is in alu_result_x
                // But we need to handle the delay slot
                // The instruction after the branch (delay slot) executes
                // Then the PC jumps to the target
                // So pc_next should be the branch target
                pc_next = alu_result_x;
            end else begin
                // No branch or branch not taken
                pc_next = pc_f + 1;
            end
        end
    end

endmodule

raw model output

```verilog
// Branch convention: The instruction immediately following a branch (the delay slot)
// always executes. The branch target PC is calculated in the X stage and applied
// in the W stage if the condition is met. If the branch is not taken, PC increments.
// This results in a 1-cycle branch delay slot.

module mcu3 #(
    parameter [0:255] IMEM_INIT = 256'h0  // 32 instruction bytes, byte 0 at MSB
) (
    input               clk,
    input               rst,             // synchronous, active-high
    output reg [4:0]    pc_out,          // current fetched PC (visible for testing)
    output reg          halt_out,        // asserted forever after HALT executes
    output reg signed [7:0] r0_out       // current value of register R0
);

    // Pipeline registers
    // Stage F (Fetch)
    reg [4:0] pc_f;
    reg [7:0] instr_f;
    
    // Stage X (Execute)
    reg [4:0] pc_x;
    reg [7:0] instr_x;
    reg [1:0] opcode_x;
    reg [1:0] rd_x;
    reg [1:0] rs_x;
    reg [1:0] cond_x;
    reg [3:0] imm_x;
    reg [3:0] off_x;
    reg [7:0] alu_result_x;
    reg       is_branch_x;
    reg       branch_taken_x;
    reg       is_halt_x;
    reg       rd_valid_x;
    
    // Stage W (Writeback)
    reg [4:0] pc_w;
    reg [7:0] instr_w;
    reg [1:0] opcode_w;
    reg [1:0] rd_w;
    reg [7:0] alu_result_w;
    reg       is_halt_w;
    reg       rd_valid_w;
    
    // Register File
    reg signed [7:0] rf [0:3];
    
    // Forwarding logic
    reg signed [7:0] fwd_val_x;
    reg       fwd_valid_x;
    
    // Next PC logic
    reg [4:0] pc_next;
    
    // Decode F stage
    always @(*) begin
        // Fetch instruction from IMEM
        // IMEM_INIT is 256 bits, 32 bytes. Byte 0 is at MSB.
        // Byte address PC maps to bits [PC*8 +: 8]
        instr_f = IMEM_INIT[pc_f*8 +: 8];
    end
    
    // Decode X stage
    always @(*) begin
        opcode_x = instr_x[7:6];
        case (opcode_x)
            2'b00: begin // ADDI
                rd_x = instr_x[5:4];
                imm_x = instr_x[3:0];
                is_branch_x = 1'b0;
                is_halt_x = 1'b0;
                rd_valid_x = 1'b1;
            end
            2'b01: begin // MOV
                rd_x = instr_x[5:4];
                rs_x = instr_x[3:2];
                is_branch_x = 1'b0;
                is_halt_x = 1'b0;
                rd_valid_x = 1'b1;
            end
            2'b10: begin // BRcc
                cond_x = instr_x[5:4];
                off_x = instr_x[3:0];
                is_branch_x = 1'b1;
                is_halt_x = 1'b0;
                rd_valid_x = 1'b0;
            end
            default: begin // HALT
                is_branch_x = 1'b0;
                is_halt_x = 1'b1;
                rd_valid_x = 1'b0;
            end
        endcase
    end
    
    // Forwarding logic for X stage
    // Forward from W stage if W is writing to the register X is reading
    always @(*) begin
        if (opcode_x == 2'b01 && rd_valid_w && (rs_x == rd_w)) begin
            fwd_val_x = alu_result_w;
            fwd_valid_x = 1'b1;
        end else if (opcode_x == 2'b00 && rd_valid_w && (rd_x == rd_w)) begin
            // For ADDI, we might need to forward if rd is also rs? No, ADDI doesn't read rs.
            // But if ADDI writes to rd, and next ADDI reads rd? No, ADDI doesn't read rd.
            // Actually, ADDI reads nothing from RF. MOV reads rs.
            // So forwarding is only needed for MOV reading rs.
            fwd_val_x = alu_result_w;
            fwd_valid_x = 1'b1;
        end else begin
            fwd_valid_x = 1'b0;
            fwd_val_x = 8'b0;
        end
    end
    
    // Execute stage ALU and branch logic
    always @(*) begin
        // Default values
        alu_result_x = 8'b0;
        branch_taken_x = 1'b0;
        
        if (opcode_x == 2'b00) begin // ADDI
            // Read rd from RF, with forwarding
            reg signed [7:0] rd_val;
            if (fwd_valid_x && (rd_x == rd_w)) begin
                rd_val = fwd_val_x;
            end else begin
                rd_val = rf[rd_x];
            end
            // Sign extend imm
            reg signed [7:0] imm_sext;
            imm_sext = {{4{imm_x[3]}}, imm_x};
            alu_result_x = rd_val + imm_sext;
        end else if (opcode_x == 2'b01) begin // MOV
            // Read rs from RF, with forwarding
            reg signed [7:0] rs_val;
            if (fwd_valid_x && (rs_x == rd_w)) begin
                rs_val = fwd_val_x;
            end else begin
                rs_val = rf[rs_x];
            end
            alu_result_x = rs_val;
        end else if (opcode_x == 2'b10) begin // BRcc
            // Determine if branch is taken
            case (cond_x)
                2'b00: branch_taken_x = 1'b1; // Always
                2'b01: branch_taken_x = (r0_out == 8'sb0); // Zero
                2'b10: branch_taken_x = (r0_out < 8'sb0);  // Negative
                2'b11: branch_taken_x = (r0_out > 8'sb0);  // Positive
                default: branch_taken_x = 1'b0;
            endcase
            // Calculate branch target
            reg signed [7:0] off_sext;
            off_sext = {{4{off_x[3]}}, off_x};
            // PC for branch is pc_x + 1 + off
            // But we need to output the target for W stage
            // We'll compute the target in W stage or here
            // Let's store the target in alu_result_x for simplicity
            alu_result_x = pc_x + 1 + off_sext;
        end else if (opcode_x == 2'b11) begin // HALT
            // Nothing to compute
        end
    end
    
    // Pipeline registers update
    always @(posedge clk) begin
        if (rst) begin
            // Reset all pipeline stages
            pc_f <= 5'b0;
            instr_f <= 8'b0;
            
            pc_x <= 5'b0;
            instr_x <= 8'b0;
            opcode_x <= 2'b0;
            rd_x <= 2'b0;
            rs_x <= 2'b0;
            cond_x <= 2'b0;
            imm_x <= 4'b0;
            off_x <= 4'b0;
            alu_result_x <= 8'b0;
            is_branch_x <= 1'b0;
            branch_taken_x <= 1'b0;
            is_halt_x <= 1'b0;
            rd_valid_x <= 1'b0;
            
            pc_w <= 5'b0;
            instr_w <= 8'b0;
            opcode_w <= 2'b0;
            rd_w <= 2'b0;
            alu_result_w <= 8'b0;
            is_halt_w <= 1'b0;
            rd_valid_w <= 1'b0;
            
            // Reset register file
            rf[0] <= 8'sb0;
            rf[1] <= 8'sb0;
            rf[2] <= 8'sb0;
            rf[3] <= 8'sb0;
            
            // Reset outputs
            pc_out <= 5'b0;
            halt_out <= 1'b0;
            r0_out <= 8'sb0;
        end else begin
            // Update F stage
            if (!halt_out) begin
                pc_f <= pc_next;
                instr_f <= IMEM_INIT[pc_next*8 +: 8];
            end
            
            // Update X stage
            if (!halt_out) begin
                pc_x <= pc_f;
                instr_x <= instr_f;
                opcode_x <= instr_f[7:6];
                case (instr_f[7:6])
                    2'b00: begin
                        rd_x <= instr_f[5:4];
                        imm_x <= instr_f[3:0];
                    end
                    2'b01: begin
                        rd_x <= instr_f[5:4];
                        rs_x <= instr_f[3:2];
                    end
                    2'b10: begin
                        cond_x <= instr_f[5:4];
                        off_x <= instr_f[3:0];
                    end
                    default: begin
                        // HALT
                    end
                endcase
                is_branch_x <= (instr_f[7:6] == 2'b10);
                is_halt_x <= (instr_f[7:6] == 2'b11);
                rd_valid_x <= (instr_f[7:6] != 2'b10) && (instr_f[7:6] != 2'b11);
            end
            
            // Update W stage
            if (!halt_out) begin
                pc_w <= pc_x;
                instr_w <= instr_x;
                opcode_w <= opcode_x;
                rd_w <= rd_x;
                alu_result_w <= alu_result_x;
                is_halt_w <= is_halt_x;
                rd_valid_w <= rd_valid_x;
            end
            
            // Update register file
            if (rd_valid_w && !is_halt_w && !halt_out) begin
                rf[rd_w] <= alu_result_w;
            end
            
            // Update outputs
            pc_out <= pc_f;
            if (is_halt_w || halt_out) begin
                halt_out <= 1'b1;
            end else begin
                halt_out <= 1'b0;
            end
            
            // Update r0_out
            if (rd_valid_w && (rd_w == 2'b0) && !is_halt_w && !halt_out) begin
                r0_out <= alu_result_w;
            end else if (halt_out) begin
                // Keep r0_out as is
            end else begin
                // If no write to R0, keep previous value? 
                // The spec says "r0_out always reflects the most recently written value of R[0]"
                // So we need to keep it if not written
                // But we also need to handle the case where R0 is updated in RF
                // Let's just mirror rf[0] if R0 was written, otherwise keep r0_out
                // Actually, let's just mirror rf[0] always for simplicity, since rf[0] is the source of truth
                r0_out <= rf[0];
            end
            
            // Compute next PC
            if (is_branch_x && branch_taken_x) begin
                // Branch taken, target is in alu_result_x
                // But we need to handle the delay slot
                // The instruction after the branch (delay slot) executes
                // Then the PC jumps to the target
                // So pc_next should be the branch target
                pc_next = alu_result_x;
            end else begin
                // No branch or branch not taken
                pc_next = pc_f + 1;
            end
        end
    end

endmodule
```