from __future__ import annotations
import json
ISA: dict[str, Encoding] = {}
"""
Dictionary of all instruction encodings based on the reference instr_dict.json.
Filtered based on supported base and extensions
"""
[docs]
class Encoding:
# Integer Registers
_int_regs_ : list[str] = [
'zero', 'ra', 'sp', 'gp', 'tp', 't0', 't1', 't2',
's0', 's1', 'a0', 'a1', 'a2', 'a3', 'a4', 'a5',
'a6', 'a7', 's2', 's3', 's4', 's5', 's6', 's7',
's8', 's9', 's10', 's11', 't3', 't4', 't5', 't6'
]
"""List of ABI integer registers"""
# Floating-Point Registers
_fp_regs_ : list[str]= [
'ft0', 'ft1', 'ft2', 'ft3', 'ft4', 'ft5', 'ft6', 'ft7',
'fs0', 'fs1', 'fa0', 'fa1', 'fa2', 'fa3', 'fa4', 'fa5',
'fa6', 'fa7', 'fs2', 'fs3', 'fs4', 'fs5', 'fs6', 'fs7',
'fs8', 'fs9', 'fs10', 'fs11', 'ft8', 'ft9', 'ft10', 'ft11'
]
"""List of ABI floating point registers"""
# Additional register categorization
_compressed_regs_ = [
# Both integer and FP compressed registers
'rd_p',
'rs1_p',
'rs2_p',
'rd_rs1_p',
'c_sreg1',
'c_sreg2',
]
_full_width_regs_ = [
# All full 5-bit registers (integer, FP, vector)
'rd',
'rs1',
'rs2',
'rs3',
'rd_n0',
'rs1_n0',
'rd_rs1_n0',
'rd_n2',
'c_rs1_n0',
'c_rs2',
'c_rs2_n0',
'vd',
'vs1',
'vs2',
'vs3',
]
# Signed Immediates
_signed_imms_ = [
# Standard signed immediates
'imm12',
'imm12hi', # S-type split (needs reconstruction)
'imm12lo', # S-type split (needs reconstruction)
'imm20', # U-type (technically unsigned but often treated as signed)
'imm5', # 5-bit signed
'bimm12hi', # B-type split (needs reconstruction)
'bimm12lo', # B-type split (needs reconstruction)
'jimm20', # J-type (needs special reconstruction)
# Compressed signed immediates
'c_imm12',
'c_imm6hi', # Split (needs reconstruction)
'c_imm6lo', # Split (needs reconstruction)
'c_nzimm6hi', # Split non-zero (needs reconstruction)
'c_nzimm6lo', # Split non-zero (needs reconstruction)
'c_nzimm10hi', # Split (needs reconstruction)
'c_nzimm10lo', # Split (needs reconstruction)
'c_nzimm18hi', # Split LUI (needs reconstruction)
'c_nzimm18lo', # Split LUI (needs reconstruction)
'c_bimm9hi', # Split branch (needs reconstruction)
'c_bimm9lo', # Split branch (needs reconstruction)
# Vector signed immediate
'simm5',
]
# Unsigned Immediates
_unsigned_imms_ = [
# Shift amounts
'shamtw', # 5-bit word shift
'shamtd', # 6-bit double-word shift
'bs', # Bit select position
# CSR
'csr', # 12-bit CSR address
# Fence
'pred', # Fence predecessor
'succ', # Fence successor
'fm', # Fence mode
# Atomic
'aq', # Acquire bit
'rl', # Release bit
# Floating-point
'rm', # Rounding mode
# Bit manipulation
'rnum', # Rotate amount
# Vector unsigned immediates
'zimm5',
'zimm6hi', # Split (needs reconstruction)
'zimm6lo', # Split (needs reconstruction)
'zimm10',
'zimm11',
'vm', # Vector mask bit
# Compressed unsigned immediates
'c_nzuimm6hi', # Split (needs reconstruction)
'c_nzuimm6lo', # Split (needs reconstruction)
'c_nzuimm10',
'c_uimm1',
'c_uimm2',
'c_uimm7hi', # Split (needs reconstruction)
'c_uimm7lo', # Split (needs reconstruction)
'c_uimm8hi', # Split (needs reconstruction)
'c_uimm8lo', # Split (needs reconstruction)
'c_uimm8sp_s',
'c_uimm8sphi', # Split (needs reconstruction)
'c_uimm8splo', # Split (needs reconstruction)
'c_uimm9sp_s',
'c_uimm9sphi', # Split (needs reconstruction)
'c_uimm9splo', # Split (needs reconstruction)
'c_spimm', # Zcmp stack pointer immediate
'c_rlist', # Zcmp register list (encoded value)
'c_index', # Custom/extension index
# Memory operation types (custom/Zacas)
'c_mop_t',
'mop_r_t_21_20',
'mop_r_t_27_26',
'mop_r_t_30',
'mop_rr_t_27_26',
'mop_rr_t_30',
]
# Split immediates that need reconstruction
_split_immediates_ = {
# Format: 'combined_name': ['hi_part', 'lo_part']
'imm12': ['imm12hi', 'imm12lo'], # S-type store
'bimm12': ['bimm12hi', 'bimm12lo'], # B-type branch
'c_imm6': ['c_imm6hi', 'c_imm6lo'],
'c_nzimm6': ['c_nzimm6hi', 'c_nzimm6lo'],
'c_nzimm10': ['c_nzimm10hi', 'c_nzimm10lo'],
'c_nzimm18': ['c_nzimm18hi', 'c_nzimm18lo'],
'c_bimm9': ['c_bimm9hi', 'c_bimm9lo'],
'c_nzuimm6': ['c_nzuimm6hi', 'c_nzuimm6lo'],
'c_uimm7': ['c_uimm7hi', 'c_uimm7lo'],
'c_uimm8': ['c_uimm8hi', 'c_uimm8lo'],
'c_uimm8sp': ['c_uimm8sphi', 'c_uimm8splo'],
'c_uimm9sp': ['c_uimm9sphi', 'c_uimm9splo'],
'zimm6': ['zimm6hi', 'zimm6lo'], # Vector
}
_field_positions_ = {
# Atomic memory ordering
'aq': (26, 26),
'rl': (25, 25),
# Branch immediates (split)
'bimm12hi': (31, 25),
'bimm12lo': (11, 7),
# Bit manipulation
'bs': (25, 20),
# Compressed branch immediate (split)
'c_bimm9hi': (12, 10),
'c_bimm9lo': (6, 2),
# Compressed jump immediate
'c_imm12': (12, 2),
# Compressed signed immediate (split)
'c_imm6hi': (12, 12),
'c_imm6lo': (6, 2),
# Compressed custom fields
'c_index': (12, 10), # Adjust as needed for your extension
'c_mop_t': (12, 10), # Adjust as needed for your extension
# Compressed non-zero signed immediate (split)
'c_nzimm10hi': (12, 12),
'c_nzimm10lo': (6, 2),
# Compressed non-zero LUI immediate (split)
'c_nzimm18hi': (12, 12),
'c_nzimm18lo': (6, 2),
# Compressed non-zero 6-bit signed immediate (split)
'c_nzimm6hi': (12, 12),
'c_nzimm6lo': (6, 2),
# Compressed non-zero unsigned immediate
'c_nzuimm10': (12, 2),
# Compressed non-zero unsigned 6-bit immediate (split)
'c_nzuimm6hi': (12, 12),
'c_nzuimm6lo': (6, 2),
# Compressed register list
'c_rlist': (7, 4), # Adjust as needed for Zcmp
# Compressed registers (non-zero)
'c_rs1_n0': (11, 7),
'c_rs2': (6, 2),
'c_rs2_n0': (6, 2),
# Compressed stack pointer immediate
'c_spimm': (12, 2), # Adjust as needed for Zcmp
# Compressed save/restore registers
'c_sreg1': (9, 7), # Adjust as needed for Zcmp
'c_sreg2': (4, 2), # Adjust as needed for Zcmp
# Compressed small unsigned immediates
'c_uimm1': (12, 12), # Single bit
'c_uimm2': (6, 5), # 2 bits
# Compressed unsigned 7-bit immediate (split)
'c_uimm7hi': (12, 12),
'c_uimm7lo': (6, 2),
# Compressed unsigned 8-bit immediate (split)
'c_uimm8hi': (12, 10),
'c_uimm8lo': (6, 2),
# Compressed stack-relative store
'c_uimm8sp_s': (12, 2),
# Compressed stack-relative unsigned 8-bit (split)
'c_uimm8sphi': (12, 10),
'c_uimm8splo': (6, 5),
# Compressed stack-relative store 9-bit
'c_uimm9sp_s': (12, 2),
# Compressed stack-relative unsigned 9-bit (split)
'c_uimm9sphi': (12, 10),
'c_uimm9splo': (6, 4),
# CSR
'csr': (31, 20),
# Fence mode
'fm': (31, 28),
# Standard immediates
'imm12': (31, 20),
'imm12hi': (31, 25),
'imm12lo': (11, 7),
'imm20': (31, 12),
'imm5': (24, 20),
# Jump immediate
'jimm20': (31, 12),
# Memory operation type fields (custom/Zacas)
'mop_r_t_21_20': (21, 20),
'mop_r_t_27_26': (27, 26),
'mop_r_t_30': (30, 30),
'mop_rr_t_27_26': (27, 26),
'mop_rr_t_30': (30, 30),
# Fence
'pred': (27, 24),
'succ': (23, 20),
# Standard registers
'rd': (11, 7),
'rd_n0': (11, 7),
'rd_n2': (11, 7),
'rd_p': (9, 7),
'rd_rs1_n0': (11, 7),
'rd_rs1_p': (9, 7),
# Floating-point rounding mode
'rm': (14, 12),
# Rotate amount
'rnum': (23, 20),
# Source registers
'rs1': (19, 15),
'rs1_n0': (19, 15),
'rs1_p': (9, 7),
'rs2': (24, 20),
'rs2_p': (4, 2),
'rs3': (31, 27),
# Shift amounts
'shamtd': (25, 20),
'shamtw': (24, 20),
# Vector signed 5-bit immediate
'simm5': (19, 15),
# Vector registers
'vd': (11, 7),
'vm': (25, 25),
'vs1': (19, 15),
'vs2': (24, 20),
'vs3': (11, 7),
# Vector zero-extended immediates
'zimm10': (29, 20),
'zimm11': (30, 20),
'zimm5': (19, 15),
# Vector 6-bit zero-extended immediate (split)
'zimm6hi': (26, 26),
'zimm6lo': (19, 15),
}
[docs]
def __init__(self, mnemonic : str, match : int, mask : int, size : int, base : str, extensions : list[str]) -> None:
"""
Constructor
:param mnemonic: Instruction mnemonic
:type encode: str
:param match: Opcode encoding bits
:type match: int
:param mask: Opcode encoding bit mask
:type mask: int
:param size: Instruction size in bytes
:type size: int
:param base: Instruction base (i.e. RV32/RV64)
:type base: str
:param extensions: List of extensions
:type extensions: list[str]
"""
self.mnemonic = mnemonic
self.match = match
self.mask = mask
self.size = size
self.base = base
self.extensions = extensions
self.operands = {}
def __str__(self) -> str:
"""
Return a string representation of the Encoding.
:return: String representation of the Encoding.
:rtype: str
"""
s = "="*70 + "\n"
for k,v in self.__dict__.items():
if isinstance(v, list):
s += f"{k:<16} : {','.join(v)}\n"
else:
s += f"{k:<16} : {v}\n"
s += "="*70 + "\n"
return s
[docs]
@classmethod
def get_mnemonic(self, encoding : int) -> str:
"""
Extract Mnemoic from the encoding
:param encoding: Post masking encoding
:type encoding: int
:return: String mnemonic
:rtype: str
"""
for mnemonic, instr_data in ISA.items():
# Check if instruction matches
if (encoding & instr_data.mask) == instr_data.match:
return mnemonic
raise ValueError(f"Failed to get mnemonic for {encoding:x}")
[docs]
@classmethod
def get_encoding(self, encoding : int) -> Encoding:
"""
Extract Mnemoic from the encoding
:param encoding: Post masking encoding
:type encoding: int
:return: Encoding object
:rtype: Encoding
"""
mnemonic = Encoding.get_mnemonic(encoding)
return ISA[mnemonic]
[docs]
@classmethod
def get_operand(self, encoding : int, name : str) -> int:
"""
Extract operand value from encoding
:param encoding: Post masking encoding
:type encoding: int
:param name: name of operand
:type name: str
:return: Operand Value
:rtype: int
"""
fp = Encoding._field_positions_[name]
nb = fp[0] - fp[1] + 1
mask = (1 << nb) -1
shift = fp[1]
return (encoding >> shift) & mask
def _analyze_operand(self, name : str) -> None:
"""
Anyalyse operands
:param name: Operand name
:type name: str
"""
# === REGULAR INTEGER REGISTER OPERANDS ===
if name in Encoding._full_width_regs_:
if name.endswith("_n0"):
return {
"type": "reg",
"min": 1,
"max": 32,
}
else:
return {
"type": "reg",
"min": 0,
"max": 32,
}
# === COMPRESSED REGISTER OPERANDS ===
if name in Encoding._compressed_regs_:
return {
"type": "int_reg",
"min": 8,
"max": 15,
}
# === SIGNED IMMEDIATE OPERANDS ===
if name in Encoding._signed_imms_:
num_bits = Encoding._field_positions_[name][0] - Encoding._field_positions_[name][1] + 1
max_value = (1 << (num_bits - 1)) - 1
min_value = -(1 << num_bits -1)
return {
"type": "imm",
"min": min_value,
"max": max_value,
}
# === UNSIGNED IMMEDIATE OPERANDS ===
if name in Encoding._unsigned_imms_:
num_bits = Encoding._field_positions_[name][0] - Encoding._field_positions_[name][1] + 1
max_value = (1 << num_bits) - 1
min_value = 0
return {
"type": "imm",
"min": min_value,
"max": max_value,
}
# === UNKNOWN OPERAND ===
raise ValueError(f"Unknown operand : {name}")
__all__ = [
"ISA",
"Encoding",
"extract_isa",
]