Source code for avl_riscv_coverage._elf

import re
import subprocess
import sys
from pathlib import Path

from elftools.elf.elffile import ELFFile

from ._instr import Instruction

INSTRUCTIONS: dict[str, Instruction]= {}
"""Dictionary of all instructions based on contents of elf file"""

EXTENSIONS: dict[str, str] = {}
"""Dictionary of all available extensions and their version based on properties of elf file"""


def _parse_arch_string_(arch_string : str) -> dict[str, str]:
    """
    Parse RISC-V architecture string like 'rv64i2p1_m2p0_a2p1...'

    :param arch_string: Architectural definition string extracted from elf
    :type arch_string: str
    :returns Dictionary of extensions and versions
    :rtype : dict[str, str]
    """

    extensions = {}

    if not arch_string.startswith('rv'):
        return extensions

    # Extract base (rv32/rv64)
    if arch_string.startswith('rv64'):
        base = 'RV64'
        rest = arch_string[4:]
    elif arch_string.startswith('rv32'):
        base = 'RV32'
        rest = arch_string[4:]
    else:
        return extensions

    extensions['base'] = base

    # Split by underscore
    parts = rest.split('_')

    # Parse each extension with version
    # Format: extension_name + version (e.g., i2p1, m2p0, zicsr2p0)
    for part in parts:
        if not part:
            continue

        # Match pattern: letters followed by optional version (digit+p+digit)
        match = re.match(r'^([a-z]+)(\d+p\d+)?$', part, re.IGNORECASE)
        if match:
            ext_name = match.group(1).upper()
            version = match.group(2) if match.group(2) else None

            # Handle special case: 'g' expands to imafd + zicsr + zifencei
            if ext_name == 'G':
                extensions['I'] = version
                extensions['M'] = version
                extensions['A'] = version
                extensions['F'] = version
                extensions['D'] = version
                extensions['Zicsr'] = version
                extensions['Zifencei'] = version
            else:
                extensions[ext_name] = version

    return extensions

def _parse_riscv_attribes_(data : str) -> dict[str, str]:
    """
    Quick parser specifically for your data format

    :param data: Attributes data extracted from elf file
    :type data: str
    :returns Dictionary of extensions and versions
    :rtype : dict[str, str]
    """

    # Find the architecture string (starts with 'rv')
    arch_start = data.find(b'rv')
    if arch_start == -1:
        return None

    # Find the null terminator after the arch string
    arch_end = data.find(b'\x00', arch_start)
    if arch_end == -1:
        arch_end = len(data)

    arch_string = data[arch_start:arch_end].decode('ascii')

    return _parse_arch_string_(arch_string)

[docs] def parse_elf(elfpath : Path) -> None: """ Parse given elf file. Extract class and instructions :param elfpath: path to elf file :type elfpath: path """ with open(elfpath, "rb") as f: elf = ELFFile(f) # Check if it's RISC-V if elf.header['e_machine'] != 'EM_RISCV': raise ValueError (f"Not a RISC-V binary: {elf.header['e_machine']}") # Extract Extensions attrs_section = elf.get_section_by_name('.riscv.attributes') if not attrs_section: raise ValueError("No .riscv.attributes section found") else: EXTENSIONS.update(_parse_riscv_attribes_(attrs_section.data())) # Discover mode if EXTENSIONS["base"] not in ["RV64", "RV32"]: raise ValueError(f"Unknown base architecture {EXTENSIONS['base']}") # Run objdump to extract instructions try: result = subprocess.run( ['riscv64-unknown-elf-objdump', '-d', elfpath], capture_output=True, text=True, check=True ) for line in result.stdout.split('\n'): # Parse: " 2000000342: 0d0075d7 vsetvli a1,zero,e32,m1,ta,ma" match = re.match(r'\s*([0-9a-f]+):\s+([0-9a-f]+)\s+(.+)', line) if match: addr = int(match.group(1), 16) bytes_hex = int(match.group(2), 16) INSTRUCTIONS[addr] = Instruction(addr, bytes_hex) except (subprocess.CalledProcessError, FileNotFoundError) as e: print(f"Could not run objdump: {e}") sys.exit(1) # Link instructions to prev / next for instr in INSTRUCTIONS.values(): n = INSTRUCTIONS.get(instr.pc + instr.size, None) instr.link(n)
__all__ = [ "INSTRUCTIONS", "EXTENSIONS", "parse_elf", ]