Source code for avl_riscv_coverage._isa

from __future__ import annotations

import json

ISA: dict[str, Encoding] = {}
"""
Dictionary of all instruction encodings based on the reference instr_dict.json.
Filtered based on supported base and extensions
"""

[docs] class Encoding: # Integer Registers _int_regs_ : list[str] = [ 'zero', 'ra', 'sp', 'gp', 'tp', 't0', 't1', 't2', 's0', 's1', 'a0', 'a1', 'a2', 'a3', 'a4', 'a5', 'a6', 'a7', 's2', 's3', 's4', 's5', 's6', 's7', 's8', 's9', 's10', 's11', 't3', 't4', 't5', 't6' ] """List of ABI integer registers""" # Floating-Point Registers _fp_regs_ : list[str]= [ 'ft0', 'ft1', 'ft2', 'ft3', 'ft4', 'ft5', 'ft6', 'ft7', 'fs0', 'fs1', 'fa0', 'fa1', 'fa2', 'fa3', 'fa4', 'fa5', 'fa6', 'fa7', 'fs2', 'fs3', 'fs4', 'fs5', 'fs6', 'fs7', 'fs8', 'fs9', 'fs10', 'fs11', 'ft8', 'ft9', 'ft10', 'ft11' ] """List of ABI floating point registers""" # Additional register categorization _compressed_regs_ = [ # Both integer and FP compressed registers 'rd_p', 'rs1_p', 'rs2_p', 'rd_rs1_p', 'c_sreg1', 'c_sreg2', ] _full_width_regs_ = [ # All full 5-bit registers (integer, FP, vector) 'rd', 'rs1', 'rs2', 'rs3', 'rd_n0', 'rs1_n0', 'rd_rs1_n0', 'rd_n2', 'c_rs1_n0', 'c_rs2', 'c_rs2_n0', 'vd', 'vs1', 'vs2', 'vs3', ] # Signed Immediates _signed_imms_ = [ # Standard signed immediates 'imm12', 'imm12hi', # S-type split (needs reconstruction) 'imm12lo', # S-type split (needs reconstruction) 'imm20', # U-type (technically unsigned but often treated as signed) 'imm5', # 5-bit signed 'bimm12hi', # B-type split (needs reconstruction) 'bimm12lo', # B-type split (needs reconstruction) 'jimm20', # J-type (needs special reconstruction) # Compressed signed immediates 'c_imm12', 'c_imm6hi', # Split (needs reconstruction) 'c_imm6lo', # Split (needs reconstruction) 'c_nzimm6hi', # Split non-zero (needs reconstruction) 'c_nzimm6lo', # Split non-zero (needs reconstruction) 'c_nzimm10hi', # Split (needs reconstruction) 'c_nzimm10lo', # Split (needs reconstruction) 'c_nzimm18hi', # Split LUI (needs reconstruction) 'c_nzimm18lo', # Split LUI (needs reconstruction) 'c_bimm9hi', # Split branch (needs reconstruction) 'c_bimm9lo', # Split branch (needs reconstruction) # Vector signed immediate 'simm5', ] # Unsigned Immediates _unsigned_imms_ = [ # Shift amounts 'shamtw', # 5-bit word shift 'shamtd', # 6-bit double-word shift 'bs', # Bit select position # CSR 'csr', # 12-bit CSR address # Fence 'pred', # Fence predecessor 'succ', # Fence successor 'fm', # Fence mode # Atomic 'aq', # Acquire bit 'rl', # Release bit # Floating-point 'rm', # Rounding mode # Bit manipulation 'rnum', # Rotate amount # Vector unsigned immediates 'zimm5', 'zimm6hi', # Split (needs reconstruction) 'zimm6lo', # Split (needs reconstruction) 'zimm10', 'zimm11', 'vm', # Vector mask bit # Compressed unsigned immediates 'c_nzuimm6hi', # Split (needs reconstruction) 'c_nzuimm6lo', # Split (needs reconstruction) 'c_nzuimm10', 'c_uimm1', 'c_uimm2', 'c_uimm7hi', # Split (needs reconstruction) 'c_uimm7lo', # Split (needs reconstruction) 'c_uimm8hi', # Split (needs reconstruction) 'c_uimm8lo', # Split (needs reconstruction) 'c_uimm8sp_s', 'c_uimm8sphi', # Split (needs reconstruction) 'c_uimm8splo', # Split (needs reconstruction) 'c_uimm9sp_s', 'c_uimm9sphi', # Split (needs reconstruction) 'c_uimm9splo', # Split (needs reconstruction) 'c_spimm', # Zcmp stack pointer immediate 'c_rlist', # Zcmp register list (encoded value) 'c_index', # Custom/extension index # Memory operation types (custom/Zacas) 'c_mop_t', 'mop_r_t_21_20', 'mop_r_t_27_26', 'mop_r_t_30', 'mop_rr_t_27_26', 'mop_rr_t_30', ] # Split immediates that need reconstruction _split_immediates_ = { # Format: 'combined_name': ['hi_part', 'lo_part'] 'imm12': ['imm12hi', 'imm12lo'], # S-type store 'bimm12': ['bimm12hi', 'bimm12lo'], # B-type branch 'c_imm6': ['c_imm6hi', 'c_imm6lo'], 'c_nzimm6': ['c_nzimm6hi', 'c_nzimm6lo'], 'c_nzimm10': ['c_nzimm10hi', 'c_nzimm10lo'], 'c_nzimm18': ['c_nzimm18hi', 'c_nzimm18lo'], 'c_bimm9': ['c_bimm9hi', 'c_bimm9lo'], 'c_nzuimm6': ['c_nzuimm6hi', 'c_nzuimm6lo'], 'c_uimm7': ['c_uimm7hi', 'c_uimm7lo'], 'c_uimm8': ['c_uimm8hi', 'c_uimm8lo'], 'c_uimm8sp': ['c_uimm8sphi', 'c_uimm8splo'], 'c_uimm9sp': ['c_uimm9sphi', 'c_uimm9splo'], 'zimm6': ['zimm6hi', 'zimm6lo'], # Vector } _field_positions_ = { # Atomic memory ordering 'aq': (26, 26), 'rl': (25, 25), # Branch immediates (split) 'bimm12hi': (31, 25), 'bimm12lo': (11, 7), # Bit manipulation 'bs': (25, 20), # Compressed branch immediate (split) 'c_bimm9hi': (12, 10), 'c_bimm9lo': (6, 2), # Compressed jump immediate 'c_imm12': (12, 2), # Compressed signed immediate (split) 'c_imm6hi': (12, 12), 'c_imm6lo': (6, 2), # Compressed custom fields 'c_index': (12, 10), # Adjust as needed for your extension 'c_mop_t': (12, 10), # Adjust as needed for your extension # Compressed non-zero signed immediate (split) 'c_nzimm10hi': (12, 12), 'c_nzimm10lo': (6, 2), # Compressed non-zero LUI immediate (split) 'c_nzimm18hi': (12, 12), 'c_nzimm18lo': (6, 2), # Compressed non-zero 6-bit signed immediate (split) 'c_nzimm6hi': (12, 12), 'c_nzimm6lo': (6, 2), # Compressed non-zero unsigned immediate 'c_nzuimm10': (12, 2), # Compressed non-zero unsigned 6-bit immediate (split) 'c_nzuimm6hi': (12, 12), 'c_nzuimm6lo': (6, 2), # Compressed register list 'c_rlist': (7, 4), # Adjust as needed for Zcmp # Compressed registers (non-zero) 'c_rs1_n0': (11, 7), 'c_rs2': (6, 2), 'c_rs2_n0': (6, 2), # Compressed stack pointer immediate 'c_spimm': (12, 2), # Adjust as needed for Zcmp # Compressed save/restore registers 'c_sreg1': (9, 7), # Adjust as needed for Zcmp 'c_sreg2': (4, 2), # Adjust as needed for Zcmp # Compressed small unsigned immediates 'c_uimm1': (12, 12), # Single bit 'c_uimm2': (6, 5), # 2 bits # Compressed unsigned 7-bit immediate (split) 'c_uimm7hi': (12, 12), 'c_uimm7lo': (6, 2), # Compressed unsigned 8-bit immediate (split) 'c_uimm8hi': (12, 10), 'c_uimm8lo': (6, 2), # Compressed stack-relative store 'c_uimm8sp_s': (12, 2), # Compressed stack-relative unsigned 8-bit (split) 'c_uimm8sphi': (12, 10), 'c_uimm8splo': (6, 5), # Compressed stack-relative store 9-bit 'c_uimm9sp_s': (12, 2), # Compressed stack-relative unsigned 9-bit (split) 'c_uimm9sphi': (12, 10), 'c_uimm9splo': (6, 4), # CSR 'csr': (31, 20), # Fence mode 'fm': (31, 28), # Standard immediates 'imm12': (31, 20), 'imm12hi': (31, 25), 'imm12lo': (11, 7), 'imm20': (31, 12), 'imm5': (24, 20), # Jump immediate 'jimm20': (31, 12), # Memory operation type fields (custom/Zacas) 'mop_r_t_21_20': (21, 20), 'mop_r_t_27_26': (27, 26), 'mop_r_t_30': (30, 30), 'mop_rr_t_27_26': (27, 26), 'mop_rr_t_30': (30, 30), # Fence 'pred': (27, 24), 'succ': (23, 20), # Standard registers 'rd': (11, 7), 'rd_n0': (11, 7), 'rd_n2': (11, 7), 'rd_p': (9, 7), 'rd_rs1_n0': (11, 7), 'rd_rs1_p': (9, 7), # Floating-point rounding mode 'rm': (14, 12), # Rotate amount 'rnum': (23, 20), # Source registers 'rs1': (19, 15), 'rs1_n0': (19, 15), 'rs1_p': (9, 7), 'rs2': (24, 20), 'rs2_p': (4, 2), 'rs3': (31, 27), # Shift amounts 'shamtd': (25, 20), 'shamtw': (24, 20), # Vector signed 5-bit immediate 'simm5': (19, 15), # Vector registers 'vd': (11, 7), 'vm': (25, 25), 'vs1': (19, 15), 'vs2': (24, 20), 'vs3': (11, 7), # Vector zero-extended immediates 'zimm10': (29, 20), 'zimm11': (30, 20), 'zimm5': (19, 15), # Vector 6-bit zero-extended immediate (split) 'zimm6hi': (26, 26), 'zimm6lo': (19, 15), }
[docs] def __init__(self, mnemonic : str, match : int, mask : int, size : int, base : str, extensions : list[str]) -> None: """ Constructor :param mnemonic: Instruction mnemonic :type encode: str :param match: Opcode encoding bits :type match: int :param mask: Opcode encoding bit mask :type mask: int :param size: Instruction size in bytes :type size: int :param base: Instruction base (i.e. RV32/RV64) :type base: str :param extensions: List of extensions :type extensions: list[str] """ self.mnemonic = mnemonic self.match = match self.mask = mask self.size = size self.base = base self.extensions = extensions self.operands = {}
def __str__(self) -> str: """ Return a string representation of the Encoding. :return: String representation of the Encoding. :rtype: str """ s = "="*70 + "\n" for k,v in self.__dict__.items(): if isinstance(v, list): s += f"{k:<16} : {','.join(v)}\n" else: s += f"{k:<16} : {v}\n" s += "="*70 + "\n" return s
[docs] @classmethod def get_mnemonic(self, encoding : int) -> str: """ Extract Mnemoic from the encoding :param encoding: Post masking encoding :type encoding: int :return: String mnemonic :rtype: str """ for mnemonic, instr_data in ISA.items(): # Check if instruction matches if (encoding & instr_data.mask) == instr_data.match: return mnemonic raise ValueError(f"Failed to get mnemonic for {encoding:x}")
[docs] @classmethod def get_encoding(self, encoding : int) -> Encoding: """ Extract Mnemoic from the encoding :param encoding: Post masking encoding :type encoding: int :return: Encoding object :rtype: Encoding """ mnemonic = Encoding.get_mnemonic(encoding) return ISA[mnemonic]
[docs] @classmethod def get_operand(self, encoding : int, name : str) -> int: """ Extract operand value from encoding :param encoding: Post masking encoding :type encoding: int :param name: name of operand :type name: str :return: Operand Value :rtype: int """ fp = Encoding._field_positions_[name] nb = fp[0] - fp[1] + 1 mask = (1 << nb) -1 shift = fp[1] return (encoding >> shift) & mask
def _analyze_operand(self, name : str) -> None: """ Anyalyse operands :param name: Operand name :type name: str """ # === REGULAR INTEGER REGISTER OPERANDS === if name in Encoding._full_width_regs_: if name.endswith("_n0"): return { "type": "reg", "min": 1, "max": 32, } else: return { "type": "reg", "min": 0, "max": 32, } # === COMPRESSED REGISTER OPERANDS === if name in Encoding._compressed_regs_: return { "type": "int_reg", "min": 8, "max": 15, } # === SIGNED IMMEDIATE OPERANDS === if name in Encoding._signed_imms_: num_bits = Encoding._field_positions_[name][0] - Encoding._field_positions_[name][1] + 1 max_value = (1 << (num_bits - 1)) - 1 min_value = -(1 << num_bits -1) return { "type": "imm", "min": min_value, "max": max_value, } # === UNSIGNED IMMEDIATE OPERANDS === if name in Encoding._unsigned_imms_: num_bits = Encoding._field_positions_[name][0] - Encoding._field_positions_[name][1] + 1 max_value = (1 << num_bits) - 1 min_value = 0 return { "type": "imm", "min": min_value, "max": max_value, } # === UNKNOWN OPERAND === raise ValueError(f"Unknown operand : {name}")
[docs] def extract_isa(ref : str) -> None: """ Extract ISAs from instr_dict.json :param ref: Location of instr_dict.json :type ref: str """ # Read reference json with open(ref) as f: ref = json.load(f) # Process instructions for k,v in ref.items(): mnemonic = k.replace("_", ".") base = list(set([e.split("_", 1)[0].upper() for e in v["extension"]])) assert len(base) == 1 extensions = [e.split("_", 1)[1].upper() for e in v["extension"]] # Extract match and mask match = int(v["match"], 16) mask = int(v["mask"], 16) # Extract operands operands = v["variable_fields"] # Extract size (compressed) size = 2 if not v["encoding"].endswith("11") else 4 enc = Encoding(mnemonic, match, mask, size, base[0], extensions) for o in operands: enc.operands[o] = enc._analyze_operand(o) ISA[mnemonic] = enc # Create an entry for unknow ISA[None] = Encoding(None, 0, 0, 4, "RV", [])
__all__ = [ "ISA", "Encoding", "extract_isa", ]