Sim2Real API Reference¶

Sim2RealBridge ¶

Dual-mode bridge connecting MARL actions to the hypervisor backend.

Usage

bridge = Sim2RealBridge(mode='sim') # training default bridge = Sim2RealBridge(mode='real') # evaluation with Docker

result = bridge.dispatch('ExploitEternalBlue', '10.0.1.3', 'Windows_Server_2016') reward_delta = bridge.reward_delta(result)

Source code in netforge_rl\sim2real\bridge.py

class Sim2RealBridge:
    """
    Dual-mode bridge connecting MARL actions to the hypervisor backend.

    Usage:
        bridge = Sim2RealBridge(mode='sim')   # training default
        bridge = Sim2RealBridge(mode='real')  # evaluation with Docker

        result = bridge.dispatch('ExploitEternalBlue', '10.0.1.3', 'Windows_Server_2016')
        reward_delta = bridge.reward_delta(result)
    """

    def __init__(self, mode: Literal['sim', 'real'] = 'sim') -> None:
        self.mode = mode
        self._driver: BaseHypervisor = self._init_driver(mode)

    def dispatch(
        self,
        action_name: str,
        target_ip: str,
        target_os: str,
    ) -> HypervisorResult:
        """Execute payload; auto-fallback to mock if real driver is down."""
        result = self._driver.dispatch(action_name, target_ip, target_os)
        logger.debug('Sim2RealBridge: %s', result)
        return result

    def reward_delta(self, result: HypervisorResult) -> float:
        """
        Map a HypervisorResult to an immediate scalar reward delta.

        This is *additive* on top of the scenario's standard reward — it
        represents additional friction from real-world exploit reliability.
        """
        if result.success:
            return _REWARD_DELTA['success']
        elif result.return_code == 2:
            # Container/infrastructure error
            return _REWARD_DELTA['failure_error']
        elif result.latency_ms >= _NOISY_LATENCY_THRESHOLD_MS:
            return _REWARD_DELTA['failure_noisy']
        else:
            return _REWARD_DELTA['failure_clean']

    def teardown_all(self) -> None:
        """Destroy all active containers/sessions — call at episode end."""
        self._driver.teardown_all()

    def is_available(self) -> bool:
        return self._driver.is_available()

    def _init_driver(self, mode: str) -> BaseHypervisor:
        if mode == 'real':
            from netforge_rl.sim2real.docker_hypervisor import DockerHypervisor

            driver = DockerHypervisor()
            if not driver.is_available():
                logger.warning(
                    'Sim2RealBridge: real mode requested but Docker unavailable. '
                    'Falling back to mock hypervisor.'
                )
                from netforge_rl.sim2real.mock_hypervisor import MockHypervisor

                return MockHypervisor()
            return driver

        # Default: sim / mock
        from netforge_rl.sim2real.mock_hypervisor import MockHypervisor

        return MockHypervisor()

dispatch ¶

dispatch(
    action_name: str, target_ip: str, target_os: str
) -> HypervisorResult

Execute payload; auto-fallback to mock if real driver is down.

Source code in netforge_rl\sim2real\bridge.py

def dispatch(
    self,
    action_name: str,
    target_ip: str,
    target_os: str,
) -> HypervisorResult:
    """Execute payload; auto-fallback to mock if real driver is down."""
    result = self._driver.dispatch(action_name, target_ip, target_os)
    logger.debug('Sim2RealBridge: %s', result)
    return result

reward_delta ¶

reward_delta(result: HypervisorResult) -> float

Map a HypervisorResult to an immediate scalar reward delta.

This is additive on top of the scenario's standard reward — it represents additional friction from real-world exploit reliability.

Source code in netforge_rl\sim2real\bridge.py

def reward_delta(self, result: HypervisorResult) -> float:
    """
    Map a HypervisorResult to an immediate scalar reward delta.

    This is *additive* on top of the scenario's standard reward — it
    represents additional friction from real-world exploit reliability.
    """
    if result.success:
        return _REWARD_DELTA['success']
    elif result.return_code == 2:
        # Container/infrastructure error
        return _REWARD_DELTA['failure_error']
    elif result.latency_ms >= _NOISY_LATENCY_THRESHOLD_MS:
        return _REWARD_DELTA['failure_noisy']
    else:
        return _REWARD_DELTA['failure_clean']

teardown_all ¶

teardown_all() -> None

Destroy all active containers/sessions — call at episode end.

Source code in netforge_rl\sim2real\bridge.py

def teardown_all(self) -> None:
    """Destroy all active containers/sessions — call at episode end."""
    self._driver.teardown_all()

HypervisorResult `dataclass` ¶

Encapsulates the raw outcome of a payload dispatched against a target.

Both mock and real Docker drivers return this object so that the Sim2RealBridge can translate the outcome into environment reward deltas and SIEM telemetry strings in a uniform way.

Source code in netforge_rl\sim2real\hypervisor_base.py

@dataclass
class HypervisorResult:
    """
    Encapsulates the raw outcome of a payload dispatched against a target.

    Both mock and real Docker drivers return this object so that the
    Sim2RealBridge can translate the outcome into environment reward deltas
    and SIEM telemetry strings in a uniform way.
    """

    success: bool
    stdout: str
    return_code: int
    latency_ms: float
    action_name: str
    target_ip: str
    target_os: str
    container_id: str = field(default='mock')  # Real DockerHypervisor populates this

    def __repr__(self) -> str:
        status = 'SUCCESS' if self.success else 'FAILED'
        return (
            f'<HypervisorResult [{status}] {self.action_name} → {self.target_ip} '
            f'({self.target_os}) | RC={self.return_code} | {self.latency_ms:.1f}ms>'
        )

MockHypervisor ¶

Bases: BaseHypervisor

Zero-dependency mock hypervisor for training-speed execution.

Uses a curated JSON library of authentic Metasploit stdout strings and models probabilistic success rates adjusted for target OS compatibility. No containers are spawned; all results are synthesised locally.

Source code in netforge_rl\sim2real\mock_hypervisor.py

class MockHypervisor(BaseHypervisor):
    """
    Zero-dependency mock hypervisor for training-speed execution.

    Uses a curated JSON library of authentic Metasploit stdout strings and
    models probabilistic success rates adjusted for target OS compatibility.
    No containers are spawned; all results are synthesised locally.
    """

    def __init__(self, seed: int | None = None):
        self._rng = random.Random(seed)
        library_path = Path(__file__).parent / 'payload_library.json'
        with open(library_path) as f:
            self._library: dict = json.load(f)

    def dispatch(
        self,
        action_name: str,
        target_ip: str,
        target_os: str,
    ) -> HypervisorResult:
        """Synthesise a realistic payload result without spawning containers."""
        t_start = time.perf_counter()

        success = self._roll_success(action_name, target_os)
        stdout = self._sample_stdout(action_name, success, target_ip)
        return_code = 0 if success else 1

        mean, std = _LATENCY_PROFILE.get(action_name, _DEFAULT_LATENCY)
        latency_ms = max(50.0, self._rng.gauss(mean, std))

        # Honour the real perf_counter so callers get a realistic wall-clock
        elapsed_ms = (time.perf_counter() - t_start) * 1000
        latency_ms = max(latency_ms, elapsed_ms)

        return HypervisorResult(
            success=success,
            stdout=stdout,
            return_code=return_code,
            latency_ms=round(latency_ms, 1),
            action_name=action_name,
            target_ip=target_ip,
            target_os=target_os,
            container_id='mock',
        )

    def teardown_all(self) -> None:
        """No-op — mock creates no resources to destroy."""
        pass

    def is_available(self) -> bool:
        return True

    def _roll_success(self, action_name: str, target_os: str) -> bool:
        base_rate = _DEFAULT_SUCCESS_RATES.get(action_name, 0.50)
        penalty = _OS_PENALTY.get(target_os, {}).get(action_name, 0.0)
        adjusted = max(0.02, min(0.98, base_rate + penalty))
        return self._rng.random() < adjusted

    def _sample_stdout(self, action_name: str, success: bool, target_ip: str) -> str:
        bucket = self._library.get(action_name)
        if bucket is None:
            # Fallback for actions not explicitly in the library
            if success:
                return f'[*] {action_name} succeeded against {target_ip}\n[*] Session opened.'
            return (
                f'[-] {action_name} failed against {target_ip}\n[-] No session created.'
            )

        key = 'success' if success else 'failure'
        samples = bucket.get(key, [])
        if not samples:
            return f'[*] {action_name} {"completed" if success else "failed"}.'

        template = self._rng.choice(samples)
        # Inject actual target IP for realism
        return template.replace('10.0.1.3', target_ip).replace('10.0.0.7', target_ip)

dispatch ¶

dispatch(
    action_name: str, target_ip: str, target_os: str
) -> HypervisorResult

Synthesise a realistic payload result without spawning containers.

Source code in netforge_rl\sim2real\mock_hypervisor.py

def dispatch(
    self,
    action_name: str,
    target_ip: str,
    target_os: str,
) -> HypervisorResult:
    """Synthesise a realistic payload result without spawning containers."""
    t_start = time.perf_counter()

    success = self._roll_success(action_name, target_os)
    stdout = self._sample_stdout(action_name, success, target_ip)
    return_code = 0 if success else 1

    mean, std = _LATENCY_PROFILE.get(action_name, _DEFAULT_LATENCY)
    latency_ms = max(50.0, self._rng.gauss(mean, std))

    # Honour the real perf_counter so callers get a realistic wall-clock
    elapsed_ms = (time.perf_counter() - t_start) * 1000
    latency_ms = max(latency_ms, elapsed_ms)

    return HypervisorResult(
        success=success,
        stdout=stdout,
        return_code=return_code,
        latency_ms=round(latency_ms, 1),
        action_name=action_name,
        target_ip=target_ip,
        target_os=target_os,
        container_id='mock',
    )

teardown_all ¶

teardown_all() -> None

No-op — mock creates no resources to destroy.

Source code in netforge_rl\sim2real\mock_hypervisor.py

def teardown_all(self) -> None:
    """No-op — mock creates no resources to destroy."""
    pass

Sim2Real API Reference¶

Sim2RealBridge ¶

dispatch ¶

reward_delta ¶

teardown_all ¶

HypervisorResult dataclass ¶

MockHypervisor ¶

dispatch ¶

teardown_all ¶

HypervisorResult `dataclass` ¶