Skip to content

Environments

Interfaces & Protocols

interfaces

TradingEnvProtocol

Bases: Protocol

Protocol defining the interface for trading environments.

Source code in src/quantrl_lab/environments/core/interfaces.py
class TradingEnvProtocol(Protocol):
    """Protocol defining the interface for trading environments."""

    # Compulsory attributes for trading environments
    data: np.ndarray
    current_step: int
    price_column_index: int
    window_size: int
    action_space: gym.Space
    observation_space: gym.Space

    # Compulsory methods for trading environments
    def step(self, action: Any) -> Tuple[np.ndarray, float, bool, bool, Dict]: ...
    def reset(self, *, seed: Optional[int] = None, options: Optional[Dict] = None) -> Tuple[np.ndarray, Dict]: ...
    def render(self, mode: str = "human"): ...
    def close(self): ...

BaseActionStrategy

Bases: ABC

An abstract base class for defining action spaces and handling agent actions.

Source code in src/quantrl_lab/environments/core/interfaces.py
class BaseActionStrategy(ABC):
    """An abstract base class for defining action spaces and handling
    agent actions."""

    @abstractmethod
    def define_action_space(self) -> gym.spaces.Space:
        """
        Defines the action space for the environment.

        Returns:
            gym.spaces.Space: The action space for the environment.
        """
        pass

    @abstractmethod
    def handle_action(self, env_self: TradingEnvProtocol, action: Any) -> Tuple[Any, Dict[str, Any]]:
        """
        Handles the action taken by the agent in the environment.

        Args:
            env_self (TradingEnvProtocol): The environment instance where the action is taken.
            action (Any): The action taken by the agent.

        Returns:
            Tuple[Any, Dict[str, Any]]: The outcome of the action taken in the environment
        """
        pass

define_action_space() abstractmethod

Defines the action space for the environment.

Returns:

Type Description
Space

gym.spaces.Space: The action space for the environment.

Source code in src/quantrl_lab/environments/core/interfaces.py
@abstractmethod
def define_action_space(self) -> gym.spaces.Space:
    """
    Defines the action space for the environment.

    Returns:
        gym.spaces.Space: The action space for the environment.
    """
    pass

handle_action(env_self, action) abstractmethod

Handles the action taken by the agent in the environment.

Parameters:

Name Type Description Default
env_self TradingEnvProtocol

The environment instance where the action is taken.

required
action Any

The action taken by the agent.

required

Returns:

Type Description
Tuple[Any, Dict[str, Any]]

Tuple[Any, Dict[str, Any]]: The outcome of the action taken in the environment

Source code in src/quantrl_lab/environments/core/interfaces.py
@abstractmethod
def handle_action(self, env_self: TradingEnvProtocol, action: Any) -> Tuple[Any, Dict[str, Any]]:
    """
    Handles the action taken by the agent in the environment.

    Args:
        env_self (TradingEnvProtocol): The environment instance where the action is taken.
        action (Any): The action taken by the agent.

    Returns:
        Tuple[Any, Dict[str, Any]]: The outcome of the action taken in the environment
    """
    pass

BaseObservationStrategy

Bases: ABC

Abstract base class for defining how an agent perceives the environment.

Source code in src/quantrl_lab/environments/core/interfaces.py
class BaseObservationStrategy(ABC):
    """Abstract base class for defining how an agent perceives the
    environment."""

    @abstractmethod
    def define_observation_space(self, env: TradingEnvProtocol) -> gym.spaces.Space:
        """
        Defines and returns the observation space for the environment.

        Args:
            env (TradingEnvProtocol): The trading environment.

        Returns:
            gym.spaces.Space: The observation space.
        """
        pass

    @abstractmethod
    def build_observation(self, env: TradingEnvProtocol) -> np.ndarray:
        """
        Builds the observation vector for the current state.

        Args:
            env (TradingEnvProtocol): The trading environment.

        Returns:
            np.ndarray: The observation vector.
        """
        pass

    @abstractmethod
    def get_feature_names(self, env: TradingEnvProtocol) -> List[str]:
        """
        Returns a list of feature names corresponding to the exact order
        of elements in the flattened observation vector.

        Args:
            env (TradingEnvProtocol): The trading environment.

        Returns:
            List[str]: A list of feature names (e.g., ["Close_t-1", "RSI_t", ...])
        """
        pass

define_observation_space(env) abstractmethod

Defines and returns the observation space for the environment.

Parameters:

Name Type Description Default
env TradingEnvProtocol

The trading environment.

required

Returns:

Type Description
Space

gym.spaces.Space: The observation space.

Source code in src/quantrl_lab/environments/core/interfaces.py
@abstractmethod
def define_observation_space(self, env: TradingEnvProtocol) -> gym.spaces.Space:
    """
    Defines and returns the observation space for the environment.

    Args:
        env (TradingEnvProtocol): The trading environment.

    Returns:
        gym.spaces.Space: The observation space.
    """
    pass

build_observation(env) abstractmethod

Builds the observation vector for the current state.

Parameters:

Name Type Description Default
env TradingEnvProtocol

The trading environment.

required

Returns:

Type Description
ndarray

np.ndarray: The observation vector.

Source code in src/quantrl_lab/environments/core/interfaces.py
@abstractmethod
def build_observation(self, env: TradingEnvProtocol) -> np.ndarray:
    """
    Builds the observation vector for the current state.

    Args:
        env (TradingEnvProtocol): The trading environment.

    Returns:
        np.ndarray: The observation vector.
    """
    pass

get_feature_names(env) abstractmethod

Returns a list of feature names corresponding to the exact order of elements in the flattened observation vector.

Parameters:

Name Type Description Default
env TradingEnvProtocol

The trading environment.

required

Returns:

Type Description
List[str]

List[str]: A list of feature names (e.g., ["Close_t-1", "RSI_t", ...])

Source code in src/quantrl_lab/environments/core/interfaces.py
@abstractmethod
def get_feature_names(self, env: TradingEnvProtocol) -> List[str]:
    """
    Returns a list of feature names corresponding to the exact order
    of elements in the flattened observation vector.

    Args:
        env (TradingEnvProtocol): The trading environment.

    Returns:
        List[str]: A list of feature names (e.g., ["Close_t-1", "RSI_t", ...])
    """
    pass

BaseRewardStrategy

Bases: ABC

Abstract base class for calculating rewards.

Source code in src/quantrl_lab/environments/core/interfaces.py
class BaseRewardStrategy(ABC):
    """Abstract base class for calculating rewards."""

    @abstractmethod
    def calculate_reward(self, env: TradingEnvProtocol) -> float:
        """
        Calculate the reward based on the action taken in the
        environment.

        Args:
            env (TradingEnvProtocol): The trading environment instance.

        Returns:
            float: The calculated reward.
        """
        raise NotImplementedError("Subclasses should implement this method.")

    def on_step_end(self, env: TradingEnvProtocol):
        """Optional: A hook to update any internal state if needed."""
        pass

calculate_reward(env) abstractmethod

Calculate the reward based on the action taken in the environment.

Parameters:

Name Type Description Default
env TradingEnvProtocol

The trading environment instance.

required

Returns:

Name Type Description
float float

The calculated reward.

Source code in src/quantrl_lab/environments/core/interfaces.py
@abstractmethod
def calculate_reward(self, env: TradingEnvProtocol) -> float:
    """
    Calculate the reward based on the action taken in the
    environment.

    Args:
        env (TradingEnvProtocol): The trading environment instance.

    Returns:
        float: The calculated reward.
    """
    raise NotImplementedError("Subclasses should implement this method.")

on_step_end(env)

Optional: A hook to update any internal state if needed.

Source code in src/quantrl_lab/environments/core/interfaces.py
def on_step_end(self, env: TradingEnvProtocol):
    """Optional: A hook to update any internal state if needed."""
    pass

Types

types

Configuration

config

SimulationConfig

Bases: BaseModel

Configuration for market simulation parameters.

Source code in src/quantrl_lab/environments/stock/components/config.py
class SimulationConfig(BaseModel):
    """Configuration for market simulation parameters."""

    transaction_cost_pct: float = Field(
        default=0.001, ge=0, lt=1, description="The percentage fee for each transaction."
    )
    slippage: float = Field(default=0.001, ge=0, lt=1, description="The slippage percentage for market orders.")
    order_expiration_steps: int = Field(
        default=5, gt=0, description="The number of steps before a pending order expires."
    )
    enable_shorting: bool = Field(default=False, description="Whether to allow short selling.")
    ignore_fees: bool = Field(default=False, description="Whether to ignore transaction costs.")

RewardConfig

Bases: BaseModel

Configuration for reward calculation parameters.

Source code in src/quantrl_lab/environments/stock/components/config.py
class RewardConfig(BaseModel):
    """Configuration for reward calculation parameters."""

    clip_range: Tuple[float, float] = Field(default=(-1.0, 1.0), description="Range to clip the final reward.")

SingleStockEnvConfig

Bases: CoreEnvConfig

Stock environment configuration, extending the core environment configuration.

Source code in src/quantrl_lab/environments/stock/components/config.py
class SingleStockEnvConfig(CoreEnvConfig):
    """Stock environment configuration, extending the core environment
    configuration."""

    # Core Defaults
    initial_balance: float = 100000.0
    window_size: int = 20
    price_column_index: int = 0

    # Components
    simulation: SimulationConfig = Field(default_factory=SimulationConfig)
    rewards: RewardConfig = Field(default_factory=RewardConfig)

    class Config:
        from_attributes = True  # "ORM Mode"

SingleStockTradingEnv

single

SingleStockTradingEnv

Bases: Env

Source code in src/quantrl_lab/environments/stock/single.py
class SingleStockTradingEnv(gym.Env):
    # Added metadata for Gymnasium compatibility
    metadata = {"render_modes": ["human", "ansi"], "render_fps": 4}

    def __init__(
        self,
        data: Union[pd.DataFrame, np.ndarray],  # DataFrame or numpy array of market data + features
        config: SingleStockEnvConfig,  # Configuration object for environment settings
        action_strategy: BaseActionStrategy,  # Strategy for defining action space and handling actions,
        reward_strategy: BaseRewardStrategy,  # Strategy for calculating rewards
        observation_strategy: BaseObservationStrategy,
        price_column: Optional[Union[str, int]] = None,  # Column name or index for price (auto-detected if None)
    ):
        super().__init__()

        # === Handle DataFrame input with auto-detection ===
        if isinstance(data, pd.DataFrame):
            self.original_columns = data.columns.tolist()
            # Auto-detect price column if not specified
            if price_column is None:
                self.price_column_index = auto_detect_price_column(data)
            elif isinstance(price_column, str):
                if price_column not in data.columns:
                    raise ValueError(
                        f"Price column '{price_column}' not found in DataFrame. Available columns: {list(data.columns)}"
                    )
                self.price_column_index = data.columns.get_loc(price_column)
            elif isinstance(price_column, int):
                if not (0 <= price_column < len(data.columns)):
                    raise ValueError(
                        f"Price column index {price_column} out of bounds. DataFrame has {len(data.columns)} columns."
                    )
                self.price_column_index = price_column
            else:
                raise ValueError("price_column must be a string (column name), integer (index), or None (auto-detect)")

            # Auto-detect OHLC columns for better execution simulation
            self.open_column_index = detect_column_index(data, ["Open", "open"])
            self.high_column_index = detect_column_index(data, ["High", "high"])
            self.low_column_index = detect_column_index(data, ["Low", "low"])

            # Convert DataFrame to numpy array
            data_array = data.values.astype(np.float32)
        else:
            # Handle numpy array input (existing behavior)
            self.original_columns = None
            self.open_column_index = None
            self.high_column_index = None
            self.low_column_index = None

            if price_column is None:
                if hasattr(config, "price_column_index") and config.price_column_index is not None:
                    self.price_column_index = config.price_column_index
                else:
                    raise ValueError("price_column must be provided when using numpy arrays")
            elif isinstance(price_column, int):
                self.price_column_index = price_column
            else:
                raise ValueError("price_column must be an integer index when using numpy arrays")

            data_array = data.astype(np.float32)

        # === Runtime error handling ===
        if data_array.ndim != 2:
            raise ValueError("Data must be a 2D array (num_steps, num_features).")
        if data_array.shape[0] <= config.window_size:
            raise ValueError("Data length must be greater than window_size.")
        if not (0 <= self.price_column_index < data_array.shape[1]):
            raise ValueError(f"price_column_index ({self.price_column_index}) is out of bounds.")

        # === Attributes ===
        self.Actions = Actions  # reference to the Actions class for easy access
        self.data = data_array  # Already converted to float32 above
        self.num_steps, self.num_features = self.data.shape
        self.window_size = config.window_size
        self._max_steps = self.num_steps - 1  # Max indexable step (data limit)

        # Set max episode steps - if None, use full data length
        self.max_episode_steps = config.max_episode_steps
        if self.max_episode_steps is None:
            self.max_episode_steps = self._max_steps - self.window_size + 1

        # Track episode steps separately from data steps
        self.episode_step = 0

        # Initialize the portfolio
        self.portfolio = StockPortfolio(
            initial_balance=config.initial_balance,
            transaction_cost_pct=config.simulation.transaction_cost_pct,
            slippage=config.simulation.slippage,
            order_expiration_steps=config.simulation.order_expiration_steps,
        )
        # TODO: consider other ways to handle expiration, e.g., GTC etc.

        # === Define the strategies for action, reward, and observation ===
        self.action_strategy = action_strategy
        self.reward_strategy = reward_strategy
        self.observation_strategy = observation_strategy
        # === Delegate the action space and observation space definitions to the strategies ===
        # This allows for more modular and flexible design, where each strategy can define its own logic
        # for actions and observations without cluttering the environment class.
        self.action_space = self.action_strategy.define_action_space()
        self.observation_space = self.observation_strategy.define_observation_space(self)

        # === Example action space values:
        # Market Buy 50% of available balance
        # [1.0, 0.5, 1.0]  # Action type 1, 50% amount, price modifier ignored

        # Limit Sell 75% of shares at 5% above market price
        # [4.0, 0.75, 1.05]  # Action type 4, 75% amount, 5% above price

        # Stop Loss 100% of shares at 10% below market price
        # [5.0, 1.0, 0.9]  # Action type 5, 100% amount, 10% below price
        # ================================================================

        # === Initialize some environment state variables ===
        self.reward_clip_range = config.rewards.clip_range
        self.prev_portfolio_value = 0.0
        self.action_type = None
        self.decoded_action_info = {}
        self.current_step = 0

    def step(self, action: np.ndarray) -> Tuple[np.ndarray, float, bool, bool, Dict]:
        """
        Execute one time step within the environment.

        Args:
            action (np.ndarray): The action to execute.

        Raises:
            ValueError: If the action is not valid.

        Returns:
            Tuple[np.ndarray, float, bool, bool, Dict]: The observation, reward, terminated, truncated, and info.
        """
        # === Input validation ===
        if not isinstance(action, np.ndarray) or action.shape != self.action_space.shape:
            raise ValueError(
                f"Invalid action format received in step: {action}. Expected shape {self.action_space.shape}"
            )

        # === Step Execution ===

        # 1. Get the current price and store the portfolio value BEFORE any changes happen in this step
        # This is important for reward calculations, as we need to know the previous value
        # of the portfolio before applying the new action.
        current_price = self._get_current_price()
        self.prev_portfolio_value = self.portfolio.get_value(current_price)

        # 2. Process any open orders that might be pending from previous steps.
        # Extract OHLC data if available for realistic execution
        current_high, current_low, current_open = None, None, None

        if self.high_column_index is not None:
            current_high = float(self.data[self.current_step, self.high_column_index])
        if self.low_column_index is not None:
            current_low = float(self.data[self.current_step, self.low_column_index])
        if self.open_column_index is not None:
            current_open = float(self.data[self.current_step, self.open_column_index])

        self.portfolio.process_open_orders(
            self.current_step,
            current_price,
            current_high=current_high,
            current_low=current_low,
            current_open=current_open,
        )

        # 3. Handle the new action and STORE the results on `self`.
        # The reward strategies will access these via `env.action_type` and `env.decoded_action_info`.
        self.action_type, self.decoded_action_info = self.action_strategy.handle_action(self, action)

        # 4. Advance time and check termination/truncation conditions
        # Increment the current step and episode step
        if self.current_step >= self._max_steps:
            raise ValueError("Cannot step beyond the maximum number of steps in the environment.")

        self.current_step += 1
        self.episode_step += 1
        current_price = self._get_current_price()

        # Determine termination and truncation
        # terminated: natural end of episode (reached end of data)
        # truncated: artificial time limit (max_episode_steps reached)
        terminated = self.current_step >= self._max_steps
        truncated = self.episode_step >= self.max_episode_steps

        # 5. Reward Calculation. This is delegated to the reward strategy.
        # We pass `self` so the strategy has full access to the environment's state.
        reward = self.reward_strategy.calculate_reward(self)

        # 6. Clip the final, combined reward (good practice to keep this).
        reward = np.clip(reward, *self.reward_clip_range).item()

        # 7. Call the 'on_step_end' hook for stateful strategies to update their internal memory.
        self.reward_strategy.on_step_end(self)

        # 8. Get next observation (no change here).
        observation = self.observation_strategy.build_observation(self)

        # 9. Build the info dictionary.
        # This contains useful information about the current state of the environment,
        # including portfolio value, balance, shares held, and the last executed order.
        # This is useful for debugging and analysis.
        # It can also be used by the reward strategy to provide additional context for reward calculation.
        info = self._build_info_dict()

        return observation, reward, terminated, truncated, info

    def reset(self, *, seed: Optional[int] = None, options: Optional[Dict] = None) -> Tuple[np.ndarray, Dict]:
        """
        Resets the environment to an initial state and returns the
        initial observation.

        Args:
            seed (Optional[int], optional): Random seed for reproducibility. Defaults to None.
            options (Optional[Dict], optional): Additional options for resetting the environment. Defaults to None.

        Returns:
            Tuple[np.ndarray, Dict]: Initial observation and info dictionary.
        """
        super().reset(seed=seed)

        # 1. Reset the current step to the initial state.
        # This is typically the first step after the initial observation.
        # We set it to the window size to ensure we have enough data for the first observation
        # and to avoid index errors.
        self.current_step = self.window_size

        # Reset episode step counter
        self.episode_step = 0

        # 2. Reset the portfolio to its initial state.
        # This clears any pending orders, resets the balance, and prepares the portfolio
        # for a new episode.
        # Note: This does not reset the portfolio's initial balance, which is set in the
        # StockPortfolio constructor. It only clears the current state.
        self.portfolio.reset()

        # 3. Reset the action type and decoded action info.
        # This is important to ensure that the environment starts fresh without any
        # lingering state from previous episodes.
        initial_observation = self.observation_strategy.build_observation(self)
        info = {
            "initial_balance": self.portfolio.initial_balance,
            "starting_step": self.current_step,
            "message": "Environment reset.",
        }
        return initial_observation, info

    def render(self, mode="human"):
        """Renders the environment state."""
        if mode == "ansi":
            return self._render_ansi()
        elif mode == "human":
            self._render_human()

    def _render_human(self):
        """Prints state information to the console."""
        current_price = self._get_current_price()
        portfolio_value = self.portfolio.get_value(current_price)
        total_shares = self.portfolio.total_shares

        print("-" * 40)
        print(f"Data Step:    {self.current_step}/{self._max_steps}")
        print(f"Episode Step: {self.episode_step}/{self.max_episode_steps}")
        print(f"Current Price:{current_price:>15.2f}")
        print(f"Balance:      {self.portfolio.balance:>15.2f}")
        print(f"Shares Held:  {self.portfolio.shares_held:>15} (Free)")
        print(f"Total Shares: {total_shares:>15} (Free + Reserved)")
        print(f"Portfolio Val:{portfolio_value:>15.2f}")
        print("-" * 40)
        print("Active Orders:")
        print(f" Pending Limit:{len(self.portfolio.pending_orders):>5}")
        print(f"  Stop Loss:    {len(self.portfolio.stop_loss_orders):>5}")
        print(f"  Take Profit:  {len(self.portfolio.take_profit_orders):>5}")

        if self.portfolio.executed_orders_history:
            last_event = self.portfolio.executed_orders_history[-1]
            print("-" * 40)
            price_value = last_event.get("price")

            # Check if price is a number before applying format
            if isinstance(price_value, (int, float)):
                price_str = f"{price_value:.2f}"
            else:
                price_str = str(price_value)

            print(
                f"Last Event:   {last_event['type']} "
                f"(Shares: {last_event.get('shares', 'N/A')}, "
                f"Price: {price_str})"
            )

        print("-" * 40)

    def _render_ansi(self) -> str:
        """Returns state information as a string."""
        current_price = self._get_current_price()
        portfolio_value = self.portfolio.get_value(current_price)
        total_shares = self.portfolio.total_shares
        last_event_str = "None"

        if self.portfolio.executed_orders_history:
            last_event = self.portfolio.executed_orders_history[-1]
            price_value = last_event.get("price", "N/A")

            # Check if price is a number before applying format
            if isinstance(price_value, (int, float)):
                price_str = f"{price_value:.2f}"
            else:
                price_str = str(price_value)

            last_event_str = f"{last_event['type']} (S:{last_event.get('shares', 'N/A')}, P:{price_str})"

        return (
            f"Data Step: {self.current_step}/{self._max_steps} | "
            f"Episode Step: {self.episode_step}/{self.max_episode_steps} | "
            f"Price: {current_price:.2f} | "
            f"Balance: {self.portfolio.balance:.2f} | "
            f"Shares(F/T): {self.portfolio.shares_held}/{total_shares} | "
            f"Value: {portfolio_value:.2f} | "
            f"Orders(P/SL/TP): {len(self.portfolio.pending_orders)}/"
            f"{len(self.portfolio.stop_loss_orders)}/"
            f"{len(self.portfolio.take_profit_orders)} | "
            f"Last Event: {last_event_str}"
        )

    def close(self):
        print("SingleStockTradingEnv closed.")

    def _get_current_price(self) -> float:
        """
        Get the current price from the data array based on the current
        step.

        Returns:
            float: The current price at the current step.
        """
        if 0 <= self.current_step < self.num_steps:
            return float(self.data[self.current_step, self.price_column_index])
        else:
            # If step is out of bounds (e.g., after done), return the last known price
            if self.num_steps > 0:
                last_valid_step = min(self.current_step, self.num_steps - 1)
                return float(self.data[last_valid_step, self.price_column_index])
            else:
                raise ValueError(
                    f"No valid price data available at step {self.current_step} (data length: {self.num_steps})"
                )

    def _build_info_dict(self) -> Dict[str, Any]:
        """
        Builds an information dictionary for the current environment
        state.

        Returns:
            Dict[str, Any]: A dictionary containing relevant information about the environment state.
        """
        current_price = self._get_current_price()
        return {
            "step": self.current_step,
            "episode_step": self.episode_step,
            "max_episode_steps": self.max_episode_steps,
            "portfolio_value": self.portfolio.get_value(current_price),
            "balance": self.portfolio.balance,
            "shares_held": self.portfolio.shares_held,
            "total_shares": self.portfolio.total_shares,
            "current_price": current_price,
            "reward": self.reward_strategy.calculate_reward(self),  # Re-calculate for info or store from step
            "action_decoded": self.decoded_action_info,
            "orders_info": {
                "pending_count": len(self.portfolio.pending_orders),
                "stop_loss_count": len(self.portfolio.stop_loss_orders),
                "take_profit_count": len(self.portfolio.take_profit_orders),
            },
            "last_order_event": (
                self.portfolio.executed_orders_history[-1] if self.portfolio.executed_orders_history else None
            ),
        }

step(action)

Execute one time step within the environment.

Parameters:

Name Type Description Default
action ndarray

The action to execute.

required

Raises:

Type Description
ValueError

If the action is not valid.

Returns:

Type Description
Tuple[ndarray, float, bool, bool, Dict]

Tuple[np.ndarray, float, bool, bool, Dict]: The observation, reward, terminated, truncated, and info.

Source code in src/quantrl_lab/environments/stock/single.py
def step(self, action: np.ndarray) -> Tuple[np.ndarray, float, bool, bool, Dict]:
    """
    Execute one time step within the environment.

    Args:
        action (np.ndarray): The action to execute.

    Raises:
        ValueError: If the action is not valid.

    Returns:
        Tuple[np.ndarray, float, bool, bool, Dict]: The observation, reward, terminated, truncated, and info.
    """
    # === Input validation ===
    if not isinstance(action, np.ndarray) or action.shape != self.action_space.shape:
        raise ValueError(
            f"Invalid action format received in step: {action}. Expected shape {self.action_space.shape}"
        )

    # === Step Execution ===

    # 1. Get the current price and store the portfolio value BEFORE any changes happen in this step
    # This is important for reward calculations, as we need to know the previous value
    # of the portfolio before applying the new action.
    current_price = self._get_current_price()
    self.prev_portfolio_value = self.portfolio.get_value(current_price)

    # 2. Process any open orders that might be pending from previous steps.
    # Extract OHLC data if available for realistic execution
    current_high, current_low, current_open = None, None, None

    if self.high_column_index is not None:
        current_high = float(self.data[self.current_step, self.high_column_index])
    if self.low_column_index is not None:
        current_low = float(self.data[self.current_step, self.low_column_index])
    if self.open_column_index is not None:
        current_open = float(self.data[self.current_step, self.open_column_index])

    self.portfolio.process_open_orders(
        self.current_step,
        current_price,
        current_high=current_high,
        current_low=current_low,
        current_open=current_open,
    )

    # 3. Handle the new action and STORE the results on `self`.
    # The reward strategies will access these via `env.action_type` and `env.decoded_action_info`.
    self.action_type, self.decoded_action_info = self.action_strategy.handle_action(self, action)

    # 4. Advance time and check termination/truncation conditions
    # Increment the current step and episode step
    if self.current_step >= self._max_steps:
        raise ValueError("Cannot step beyond the maximum number of steps in the environment.")

    self.current_step += 1
    self.episode_step += 1
    current_price = self._get_current_price()

    # Determine termination and truncation
    # terminated: natural end of episode (reached end of data)
    # truncated: artificial time limit (max_episode_steps reached)
    terminated = self.current_step >= self._max_steps
    truncated = self.episode_step >= self.max_episode_steps

    # 5. Reward Calculation. This is delegated to the reward strategy.
    # We pass `self` so the strategy has full access to the environment's state.
    reward = self.reward_strategy.calculate_reward(self)

    # 6. Clip the final, combined reward (good practice to keep this).
    reward = np.clip(reward, *self.reward_clip_range).item()

    # 7. Call the 'on_step_end' hook for stateful strategies to update their internal memory.
    self.reward_strategy.on_step_end(self)

    # 8. Get next observation (no change here).
    observation = self.observation_strategy.build_observation(self)

    # 9. Build the info dictionary.
    # This contains useful information about the current state of the environment,
    # including portfolio value, balance, shares held, and the last executed order.
    # This is useful for debugging and analysis.
    # It can also be used by the reward strategy to provide additional context for reward calculation.
    info = self._build_info_dict()

    return observation, reward, terminated, truncated, info

reset(*, seed=None, options=None)

Resets the environment to an initial state and returns the initial observation.

Parameters:

Name Type Description Default
seed Optional[int]

Random seed for reproducibility. Defaults to None.

None
options Optional[Dict]

Additional options for resetting the environment. Defaults to None.

None

Returns:

Type Description
Tuple[ndarray, Dict]

Tuple[np.ndarray, Dict]: Initial observation and info dictionary.

Source code in src/quantrl_lab/environments/stock/single.py
def reset(self, *, seed: Optional[int] = None, options: Optional[Dict] = None) -> Tuple[np.ndarray, Dict]:
    """
    Resets the environment to an initial state and returns the
    initial observation.

    Args:
        seed (Optional[int], optional): Random seed for reproducibility. Defaults to None.
        options (Optional[Dict], optional): Additional options for resetting the environment. Defaults to None.

    Returns:
        Tuple[np.ndarray, Dict]: Initial observation and info dictionary.
    """
    super().reset(seed=seed)

    # 1. Reset the current step to the initial state.
    # This is typically the first step after the initial observation.
    # We set it to the window size to ensure we have enough data for the first observation
    # and to avoid index errors.
    self.current_step = self.window_size

    # Reset episode step counter
    self.episode_step = 0

    # 2. Reset the portfolio to its initial state.
    # This clears any pending orders, resets the balance, and prepares the portfolio
    # for a new episode.
    # Note: This does not reset the portfolio's initial balance, which is set in the
    # StockPortfolio constructor. It only clears the current state.
    self.portfolio.reset()

    # 3. Reset the action type and decoded action info.
    # This is important to ensure that the environment starts fresh without any
    # lingering state from previous episodes.
    initial_observation = self.observation_strategy.build_observation(self)
    info = {
        "initial_balance": self.portfolio.initial_balance,
        "starting_step": self.current_step,
        "message": "Environment reset.",
    }
    return initial_observation, info

render(mode='human')

Renders the environment state.

Source code in src/quantrl_lab/environments/stock/single.py
def render(self, mode="human"):
    """Renders the environment state."""
    if mode == "ansi":
        return self._render_ansi()
    elif mode == "human":
        self._render_human()

Portfolio

portfolio

StockPortfolio

Bases: Portfolio

A portfolio for stock trading that handles complex order types, fees, and slippage.

It extends the simple Portfolio with stock-specific logic and state.

Source code in src/quantrl_lab/environments/stock/components/portfolio.py
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
class StockPortfolio(Portfolio):
    """
    A portfolio for stock trading that handles complex order types,
    fees, and slippage.

    It extends the simple Portfolio with stock-specific logic and state.
    """

    def __init__(
        self,
        initial_balance: float,
        transaction_cost_pct: float,
        slippage: float,
        order_expiration_steps: int,
    ):
        # === Initialize the parent class with the part it cares about ===
        super().__init__(initial_balance=initial_balance)

        # === Transaction cost and slippage can be adjusted to reflect difficulties in trading ===
        self.transaction_cost_pct = transaction_cost_pct
        self.slippage = slippage
        self.order_expiration_steps = order_expiration_steps

        # === Stock-specific state ===
        # Using strict typing with dataclasses instead of generic dicts
        self.pending_orders: List[Order] = []
        self.stop_loss_orders: List[Order] = []
        self.take_profit_orders: List[Order] = []

        # We keep history as Dict for now to allow flexible logging and compatibility with existing renderers
        self.executed_orders_history: List[Dict[str, Any]] = []

    def reset(self) -> None:
        """Reset the portfolio to its initial state."""
        super().reset()
        self.pending_orders = []
        self.stop_loss_orders = []
        self.take_profit_orders = []
        self.executed_orders_history = []

    @property
    def shares_held(self) -> int:
        """
        Returns the number of shares currently held in the portfolio.

        Returns:
            int: The number of shares held.
        """
        return self.units_held

    @property
    def total_shares(self) -> int:
        """
        Returns the total number of shares held, including those
        reserved in orders.

        Returns:
            int: The total number of shares held.
        """
        return self.units_held + self._get_reserved_shares()

    def get_value(self, current_price: float) -> float:
        """
        Calculate the total value of the portfolio including unfilled
        orders and reserved money.

        Args:
            current_price (float): The current market price of the asset.

        Returns:
            float: The total portfolio value including all positions and reserved amounts.
        """
        # Base value: free balance + value of free shares
        total_value = self.balance + (self.units_held * current_price)

        # Add reserved cash from pending buy orders
        for order in self.pending_orders:
            if order.type == OrderType.LIMIT_BUY:
                total_value += order.cost_reserved

        # Add value of shares reserved in pending sell orders
        for order in self.pending_orders:
            if order.type == OrderType.LIMIT_SELL:
                total_value += order.shares * current_price

        # Add value of shares reserved in stop loss orders
        for order in self.stop_loss_orders:
            total_value += order.shares * current_price

        # Add value of shares reserved in take profit orders
        for order in self.take_profit_orders:
            total_value += order.shares * current_price

        return total_value

    def process_open_orders(
        self,
        current_step: int,
        current_price: float,
        current_high: Optional[float] = None,
        current_low: Optional[float] = None,
        current_open: Optional[float] = None,
    ) -> None:
        """
        Process all open orders using OHLC data for realistic execution.

        Args:
            current_step (int): The current step in the trading environment.
            current_price (float): The current close price.
            current_high (Optional[float]): High price of the bar. Defaults to current_price.
            current_low (Optional[float]): Low price of the bar. Defaults to current_price.
            current_open (Optional[float]): Open price of the bar. Defaults to current_price.
        """
        # Fallback for Close-only execution (backward compatibility)
        if current_high is None:
            current_high = current_price
        if current_low is None:
            current_low = current_price
        if current_open is None:
            current_open = current_price

        self._process_pending_orders(current_step, current_price, current_high, current_low, current_open)
        self._process_risk_management_orders(current_step, current_price, current_high, current_low, current_open)

    def execute_market_order(
        self, action_type: Actions, current_price: float, amount_pct: float, current_step: int
    ) -> None:
        """
        Execute a market order.

        Args:
            action_type (Actions): The type of action (buy/sell).
            current_price (float): The current market price.
            amount_pct (float): The percentage of the portfolio to use for the order.
            current_step (int): The current step in the trading environment.

        Returns:
            None
        """
        # Clip amount_pct to valid range
        amount_pct = max(0.0, min(1.0, amount_pct))

        # === Runtime error checks ===
        if self.balance <= 0 and action_type == Actions.Buy:
            return  # Insufficient balance to execute buy order
        if action_type not in [Actions.Buy, Actions.Sell]:
            raise ValueError("Invalid action type for market order")

        # === Buy Logic ===
        if action_type == Actions.Buy:
            adjusted_price = current_price * (1 + self.slippage)
            cost_per_share = adjusted_price * (1 + self.transaction_cost_pct)
            if cost_per_share <= 1e-9:
                return  # Avoid division by zero

            shares_to_buy = int((self.balance / cost_per_share) * amount_pct)
            if shares_to_buy > 0:
                actual_cost = shares_to_buy * cost_per_share
                if actual_cost <= self.balance:
                    self.balance -= actual_cost
                    self.units_held += shares_to_buy
                    self.executed_orders_history.append(
                        {
                            "step": current_step,
                            "type": "market_buy",
                            "shares": shares_to_buy,
                            "price": adjusted_price,
                            "cost": actual_cost,
                        }
                    )

        # === Sell Logic ===
        elif action_type == Actions.Sell:
            if self.units_held <= 0:
                return
            shares_to_sell = int(self.units_held * amount_pct)
            if shares_to_sell > 0:
                adjusted_price = current_price * (1 - self.slippage)
                revenue = shares_to_sell * adjusted_price * (1 - self.transaction_cost_pct)
                self.units_held -= shares_to_sell
                self.balance += revenue
                self.executed_orders_history.append(
                    {
                        "step": current_step,
                        "type": "market_sell",
                        "shares": shares_to_sell,
                        "price": adjusted_price,
                        "revenue": revenue,
                    }
                )

    def place_limit_order(
        self,
        action_type: Actions,
        current_price: float,
        amount_pct: float,
        price_modifier: float,
        current_step: int,
        tif: OrderTIF = OrderTIF.TTL,  # Default to TTL to preserve previous behavior
    ) -> None:
        """
        Place a limit order for buying or selling an asset.

        Args:
            action_type (Actions): The type of action (LimitBuy/LimitSell).
            current_price (float): The current market price.
            amount_pct (float): The percentage of the portfolio to use for the order.
            price_modifier (float): The price modifier to apply to the current price.
            current_step (int): The current step in the trading environment.
            tif (OrderTIF): Time in Force for the order.

        Returns:
            None
        """
        limit_price = current_price * price_modifier

        # === Limit Buy Logic ===
        if action_type == Actions.LimitBuy:
            cost_per_share = limit_price * (1 + self.transaction_cost_pct)
            if cost_per_share <= 1e-9:
                return
            shares_to_buy = int((self.balance / cost_per_share) * amount_pct)
            if shares_to_buy > 0:
                cost_reserved = shares_to_buy * cost_per_share

                # Check balance
                if cost_reserved > self.balance:
                    return

                # --- Handle IOC (Immediate or Cancel) ---
                if tif == OrderTIF.IOC:
                    # If current price <= limit price, execute immediately
                    if current_price <= limit_price:
                        # IOC Execution matches logic of standard execution
                        execution_price = limit_price  # or current_price? Standard logic uses limit_price

                        self.balance -= cost_reserved
                        # Add shares (execution success)
                        self.units_held += shares_to_buy

                        self.executed_orders_history.append(
                            {
                                "step": current_step,
                                "type": "limit_buy_executed_ioc",
                                "shares": shares_to_buy,
                                "price": execution_price,
                                "cost": cost_reserved,
                            }
                        )
                    # If not executable, do nothing (cancel)
                    return

                # --- Handle GTC / TTL (Pending) ---
                self.balance -= cost_reserved
                order = Order(
                    type=OrderType.LIMIT_BUY,
                    shares=shares_to_buy,
                    price=limit_price,
                    placed_at=current_step,
                    reference_price=current_price,
                    cost_reserved=cost_reserved,
                    tif=tif,
                )
                self.pending_orders.append(order)

                self.executed_orders_history.append(
                    {
                        "step": current_step,
                        "type": "limit_buy_placed",
                        "shares": shares_to_buy,
                        "price": limit_price,
                        "tif": tif.value,
                    }
                )

        # === Limit Sell Logic ===
        elif action_type == Actions.LimitSell:
            if self.units_held <= 0:
                return
            shares_to_sell = int(self.units_held * amount_pct)
            if shares_to_sell > 0:

                # --- Handle IOC (Immediate or Cancel) ---
                if tif == OrderTIF.IOC:
                    # If current price >= limit price, execute immediately
                    if current_price >= limit_price:
                        execution_price = limit_price

                        # Calculate revenue
                        revenue = shares_to_sell * execution_price * (1 - self.transaction_cost_pct)

                        self.units_held -= shares_to_sell
                        self.balance += revenue

                        self.executed_orders_history.append(
                            {
                                "step": current_step,
                                "type": "limit_sell_executed_ioc",
                                "shares": shares_to_sell,
                                "price": execution_price,
                                "revenue": revenue,
                            }
                        )
                    # If not executable, do nothing (cancel)
                    return

                # --- Handle GTC / TTL (Pending) ---
                self.units_held -= shares_to_sell
                order = Order(
                    type=OrderType.LIMIT_SELL,
                    shares=shares_to_sell,
                    price=limit_price,
                    placed_at=current_step,
                    reference_price=current_price,
                    tif=tif,
                )
                self.pending_orders.append(order)

                self.executed_orders_history.append(
                    {
                        "step": current_step,
                        "type": "limit_sell_placed",
                        "shares": shares_to_sell,
                        "price": limit_price,
                        "tif": tif.value,
                    }
                )

    def place_risk_management_order(
        self,
        action_type: Actions,
        current_price: float,
        amount_pct: float,
        price_modifier: float,
        current_step: int,
        tif: OrderTIF = OrderTIF.GTC,  # Default to GTC (standard for stop loss)
    ) -> None:
        """
        Place a risk management order (stop loss or take profit).

        Args:
            action_type (Actions): The type of action (StopLoss/TakeProfit).
            current_price (float): The current market price.
            amount_pct (float): The percentage of the portfolio to use for the order.
            price_modifier (float): The price modifier to apply to the current price.
            current_step (int): The current step in the trading environment.
            tif (OrderTIF): Time in Force. Only GTC and TTL are valid for Stop orders.

        Returns:
            None
        """
        # Validate TIF for Stop orders
        if tif == OrderTIF.IOC:
            return  # IOC is invalid for Stop orders (must rest until trigger)

        if self.units_held <= 0:
            return
        shares_to_cover = int(self.units_held * amount_pct)
        if shares_to_cover > 0:
            # === Stop Loss Logic ===
            if action_type == Actions.StopLoss:
                stop_price = current_price * min(0.999, price_modifier)
                if stop_price >= current_price:
                    stop_price = current_price * 0.999

                self.units_held -= shares_to_cover

                order = Order(
                    type=OrderType.STOP_LOSS, shares=shares_to_cover, price=stop_price, placed_at=current_step, tif=tif
                )
                self.stop_loss_orders.append(order)

                self.executed_orders_history.append(
                    {
                        "step": current_step,
                        "type": "stop_loss_placed",
                        "shares": shares_to_cover,
                        "price": stop_price,
                        "tif": tif.value,
                    }
                )
            # === Take Profit Logic ===
            elif action_type == Actions.TakeProfit:
                take_profit_price = current_price * max(1.001, price_modifier)
                if take_profit_price <= current_price:
                    take_profit_price = current_price * 1.001

                self.units_held -= shares_to_cover

                order = Order(
                    type=OrderType.TAKE_PROFIT,
                    shares=shares_to_cover,
                    price=take_profit_price,
                    placed_at=current_step,
                    tif=tif,
                )
                self.take_profit_orders.append(order)

                self.executed_orders_history.append(
                    {
                        "step": current_step,
                        "type": "take_profit_placed",
                        "shares": shares_to_cover,
                        "price": take_profit_price,
                        "tif": tif.value,
                    }
                )

    # === Private Helper Methods ===
    def _get_reserved_shares(self) -> int:
        """
        Get the total number of shares reserved for open orders.

        Returns:
            int: The total number of shares reserved.
        """
        reserved_sl = sum(order.shares for order in self.stop_loss_orders)
        reserved_tp = sum(order.shares for order in self.take_profit_orders)
        reserved_limit_sell = sum(order.shares for order in self.pending_orders if order.type == OrderType.LIMIT_SELL)
        return reserved_sl + reserved_tp + reserved_limit_sell

    def _process_pending_orders(
        self,
        current_step: int,
        current_price: float,
        current_high: float,
        current_low: float,
        current_open: float,
    ) -> None:
        """Process pending limit orders."""
        remaining_orders: List[Order] = []
        executed_order_details = []

        for order in self.pending_orders:
            executed = False

            # Check for expiration
            expired = False
            if order.tif == OrderTIF.TTL:
                expired = current_step - order.placed_at > self.order_expiration_steps

            if expired:
                if order.type == OrderType.LIMIT_BUY:
                    self.balance += order.cost_reserved
                elif order.type == OrderType.LIMIT_SELL:
                    self.units_held += order.shares

                executed_order_details.append(
                    {
                        "step": current_step,
                        "type": f"{order.type.value}_expired",
                        "shares": order.shares,
                        "price": order.price,
                        "reason": "Expired",
                    }
                )
                executed = True

            # === Limit Buy Execution ===
            # Execute if Low price dipped below Limit Price
            elif order.type == OrderType.LIMIT_BUY and current_low <= order.price:
                # Determine execution price (Gap Handling)
                # If Open < Limit, we assume we filled at Open (better price).
                # Otherwise we filled at Limit.
                execution_price = order.price
                if current_open < order.price:
                    execution_price = current_open

                # Refund the cost difference if we got a better price
                actual_cost = order.shares * execution_price * (1 + self.transaction_cost_pct)
                cost_diff = order.cost_reserved - actual_cost
                if cost_diff > 0:
                    self.balance += cost_diff

                # Note: We technically might have reserved too little if execution_price > reserved_price
                # but Limit Buy ensures price <= limit, so cost is always <= reserved.

                self.units_held += order.shares
                executed = True

                executed_order_details.append(
                    {
                        "step": current_step,
                        "type": "limit_buy_executed",
                        "shares": order.shares,
                        "price": execution_price,
                        "reference_price": order.reference_price,
                        "cost": actual_cost,
                    }
                )

            # === Limit Sell Execution ===
            # Execute if High price reached Limit Price
            elif order.type == OrderType.LIMIT_SELL and current_high >= order.price:
                # Determine execution price (Gap Handling)
                # If Open > Limit, we filled at Open (better price).
                execution_price = order.price
                if current_open > order.price:
                    execution_price = current_open

                revenue = order.shares * execution_price * (1 - self.transaction_cost_pct)
                self.balance += revenue
                executed = True

                executed_order_details.append(
                    {
                        "step": current_step,
                        "type": "limit_sell_executed",
                        "shares": order.shares,
                        "price": execution_price,
                        "reference_price": order.reference_price,
                        "revenue": revenue,
                    }
                )

            if not executed:
                remaining_orders.append(order)

        # Update the list of pending orders and log any events
        self.pending_orders = remaining_orders
        if executed_order_details:
            self.executed_orders_history.extend(executed_order_details)

    def _process_risk_management_orders(
        self,
        current_step: int,
        current_price: float,
        current_high: float,
        current_low: float,
        current_open: float,
    ) -> None:
        """Process stop-loss and take-profit orders."""
        executed_order_details = []

        # === Process Stop Loss Orders ===
        remaining_stop_loss: List[Order] = []
        for order in self.stop_loss_orders:
            # Check Expiration for TTL
            expired = False
            if order.tif == OrderTIF.TTL:
                expired = current_step - order.placed_at > self.order_expiration_steps

            if expired:
                self.units_held += order.shares
                executed_order_details.append(
                    {
                        "step": current_step,
                        "type": "stop_loss_expired",
                        "shares": order.shares,
                        "price": order.price,
                    }
                )
                continue

            # Check Trigger: Low <= Stop Price
            if current_low <= order.price:
                # Determine execution price (Gap Handling)
                # If Open < Stop Price (gap down), we fill at Open (worse price).
                # Otherwise we fill at Stop Price.
                trigger_price = order.price
                fill_price = trigger_price
                if current_open < trigger_price:
                    fill_price = current_open

                # Apply slippage to the fill price
                adjusted_price = fill_price * (1 - self.slippage)
                revenue = order.shares * adjusted_price * (1 - self.transaction_cost_pct)
                self.balance += revenue

                executed_order_details.append(
                    {
                        "step": current_step,
                        "type": "stop_loss_executed",
                        "shares": order.shares,
                        "trigger_price": trigger_price,
                        "execution_price": adjusted_price,
                        "revenue": revenue,
                    }
                )
            else:
                remaining_stop_loss.append(order)
        self.stop_loss_orders = remaining_stop_loss

        # === Process Take Profit Orders ===
        remaining_take_profit: List[Order] = []
        for order in self.take_profit_orders:
            # Check Expiration
            expired = False
            if order.tif == OrderTIF.TTL:
                expired = current_step - order.placed_at > self.order_expiration_steps

            if expired:
                self.units_held += order.shares
                executed_order_details.append(
                    {
                        "step": current_step,
                        "type": "take_profit_expired",
                        "shares": order.shares,
                        "price": order.price,
                    }
                )
                continue

            # Check Trigger: High >= Take Profit Price
            if current_high >= order.price:
                # Determine execution price (Gap Handling)
                # If Open > TP Price (gap up), we fill at Open (better price).
                trigger_price = order.price
                fill_price = trigger_price
                if current_open > trigger_price:
                    fill_price = current_open

                # Apply slippage
                adjusted_price = fill_price * (1 - self.slippage)
                revenue = order.shares * adjusted_price * (1 - self.transaction_cost_pct)
                self.balance += revenue

                executed_order_details.append(
                    {
                        "step": current_step,
                        "type": "take_profit_executed",
                        "shares": order.shares,
                        "trigger_price": trigger_price,
                        "execution_price": adjusted_price,
                        "revenue": revenue,
                    }
                )
            else:
                remaining_take_profit.append(order)
        self.take_profit_orders = remaining_take_profit

shares_held property

Returns the number of shares currently held in the portfolio.

Returns:

Name Type Description
int int

The number of shares held.

total_shares property

Returns the total number of shares held, including those reserved in orders.

Returns:

Name Type Description
int int

The total number of shares held.

reset()

Reset the portfolio to its initial state.

Source code in src/quantrl_lab/environments/stock/components/portfolio.py
def reset(self) -> None:
    """Reset the portfolio to its initial state."""
    super().reset()
    self.pending_orders = []
    self.stop_loss_orders = []
    self.take_profit_orders = []
    self.executed_orders_history = []

get_value(current_price)

Calculate the total value of the portfolio including unfilled orders and reserved money.

Parameters:

Name Type Description Default
current_price float

The current market price of the asset.

required

Returns:

Name Type Description
float float

The total portfolio value including all positions and reserved amounts.

Source code in src/quantrl_lab/environments/stock/components/portfolio.py
def get_value(self, current_price: float) -> float:
    """
    Calculate the total value of the portfolio including unfilled
    orders and reserved money.

    Args:
        current_price (float): The current market price of the asset.

    Returns:
        float: The total portfolio value including all positions and reserved amounts.
    """
    # Base value: free balance + value of free shares
    total_value = self.balance + (self.units_held * current_price)

    # Add reserved cash from pending buy orders
    for order in self.pending_orders:
        if order.type == OrderType.LIMIT_BUY:
            total_value += order.cost_reserved

    # Add value of shares reserved in pending sell orders
    for order in self.pending_orders:
        if order.type == OrderType.LIMIT_SELL:
            total_value += order.shares * current_price

    # Add value of shares reserved in stop loss orders
    for order in self.stop_loss_orders:
        total_value += order.shares * current_price

    # Add value of shares reserved in take profit orders
    for order in self.take_profit_orders:
        total_value += order.shares * current_price

    return total_value

process_open_orders(current_step, current_price, current_high=None, current_low=None, current_open=None)

Process all open orders using OHLC data for realistic execution.

Parameters:

Name Type Description Default
current_step int

The current step in the trading environment.

required
current_price float

The current close price.

required
current_high Optional[float]

High price of the bar. Defaults to current_price.

None
current_low Optional[float]

Low price of the bar. Defaults to current_price.

None
current_open Optional[float]

Open price of the bar. Defaults to current_price.

None
Source code in src/quantrl_lab/environments/stock/components/portfolio.py
def process_open_orders(
    self,
    current_step: int,
    current_price: float,
    current_high: Optional[float] = None,
    current_low: Optional[float] = None,
    current_open: Optional[float] = None,
) -> None:
    """
    Process all open orders using OHLC data for realistic execution.

    Args:
        current_step (int): The current step in the trading environment.
        current_price (float): The current close price.
        current_high (Optional[float]): High price of the bar. Defaults to current_price.
        current_low (Optional[float]): Low price of the bar. Defaults to current_price.
        current_open (Optional[float]): Open price of the bar. Defaults to current_price.
    """
    # Fallback for Close-only execution (backward compatibility)
    if current_high is None:
        current_high = current_price
    if current_low is None:
        current_low = current_price
    if current_open is None:
        current_open = current_price

    self._process_pending_orders(current_step, current_price, current_high, current_low, current_open)
    self._process_risk_management_orders(current_step, current_price, current_high, current_low, current_open)

execute_market_order(action_type, current_price, amount_pct, current_step)

Execute a market order.

Parameters:

Name Type Description Default
action_type Actions

The type of action (buy/sell).

required
current_price float

The current market price.

required
amount_pct float

The percentage of the portfolio to use for the order.

required
current_step int

The current step in the trading environment.

required

Returns:

Type Description
None

None

Source code in src/quantrl_lab/environments/stock/components/portfolio.py
def execute_market_order(
    self, action_type: Actions, current_price: float, amount_pct: float, current_step: int
) -> None:
    """
    Execute a market order.

    Args:
        action_type (Actions): The type of action (buy/sell).
        current_price (float): The current market price.
        amount_pct (float): The percentage of the portfolio to use for the order.
        current_step (int): The current step in the trading environment.

    Returns:
        None
    """
    # Clip amount_pct to valid range
    amount_pct = max(0.0, min(1.0, amount_pct))

    # === Runtime error checks ===
    if self.balance <= 0 and action_type == Actions.Buy:
        return  # Insufficient balance to execute buy order
    if action_type not in [Actions.Buy, Actions.Sell]:
        raise ValueError("Invalid action type for market order")

    # === Buy Logic ===
    if action_type == Actions.Buy:
        adjusted_price = current_price * (1 + self.slippage)
        cost_per_share = adjusted_price * (1 + self.transaction_cost_pct)
        if cost_per_share <= 1e-9:
            return  # Avoid division by zero

        shares_to_buy = int((self.balance / cost_per_share) * amount_pct)
        if shares_to_buy > 0:
            actual_cost = shares_to_buy * cost_per_share
            if actual_cost <= self.balance:
                self.balance -= actual_cost
                self.units_held += shares_to_buy
                self.executed_orders_history.append(
                    {
                        "step": current_step,
                        "type": "market_buy",
                        "shares": shares_to_buy,
                        "price": adjusted_price,
                        "cost": actual_cost,
                    }
                )

    # === Sell Logic ===
    elif action_type == Actions.Sell:
        if self.units_held <= 0:
            return
        shares_to_sell = int(self.units_held * amount_pct)
        if shares_to_sell > 0:
            adjusted_price = current_price * (1 - self.slippage)
            revenue = shares_to_sell * adjusted_price * (1 - self.transaction_cost_pct)
            self.units_held -= shares_to_sell
            self.balance += revenue
            self.executed_orders_history.append(
                {
                    "step": current_step,
                    "type": "market_sell",
                    "shares": shares_to_sell,
                    "price": adjusted_price,
                    "revenue": revenue,
                }
            )

place_limit_order(action_type, current_price, amount_pct, price_modifier, current_step, tif=OrderTIF.TTL)

Place a limit order for buying or selling an asset.

Parameters:

Name Type Description Default
action_type Actions

The type of action (LimitBuy/LimitSell).

required
current_price float

The current market price.

required
amount_pct float

The percentage of the portfolio to use for the order.

required
price_modifier float

The price modifier to apply to the current price.

required
current_step int

The current step in the trading environment.

required
tif OrderTIF

Time in Force for the order.

TTL

Returns:

Type Description
None

None

Source code in src/quantrl_lab/environments/stock/components/portfolio.py
def place_limit_order(
    self,
    action_type: Actions,
    current_price: float,
    amount_pct: float,
    price_modifier: float,
    current_step: int,
    tif: OrderTIF = OrderTIF.TTL,  # Default to TTL to preserve previous behavior
) -> None:
    """
    Place a limit order for buying or selling an asset.

    Args:
        action_type (Actions): The type of action (LimitBuy/LimitSell).
        current_price (float): The current market price.
        amount_pct (float): The percentage of the portfolio to use for the order.
        price_modifier (float): The price modifier to apply to the current price.
        current_step (int): The current step in the trading environment.
        tif (OrderTIF): Time in Force for the order.

    Returns:
        None
    """
    limit_price = current_price * price_modifier

    # === Limit Buy Logic ===
    if action_type == Actions.LimitBuy:
        cost_per_share = limit_price * (1 + self.transaction_cost_pct)
        if cost_per_share <= 1e-9:
            return
        shares_to_buy = int((self.balance / cost_per_share) * amount_pct)
        if shares_to_buy > 0:
            cost_reserved = shares_to_buy * cost_per_share

            # Check balance
            if cost_reserved > self.balance:
                return

            # --- Handle IOC (Immediate or Cancel) ---
            if tif == OrderTIF.IOC:
                # If current price <= limit price, execute immediately
                if current_price <= limit_price:
                    # IOC Execution matches logic of standard execution
                    execution_price = limit_price  # or current_price? Standard logic uses limit_price

                    self.balance -= cost_reserved
                    # Add shares (execution success)
                    self.units_held += shares_to_buy

                    self.executed_orders_history.append(
                        {
                            "step": current_step,
                            "type": "limit_buy_executed_ioc",
                            "shares": shares_to_buy,
                            "price": execution_price,
                            "cost": cost_reserved,
                        }
                    )
                # If not executable, do nothing (cancel)
                return

            # --- Handle GTC / TTL (Pending) ---
            self.balance -= cost_reserved
            order = Order(
                type=OrderType.LIMIT_BUY,
                shares=shares_to_buy,
                price=limit_price,
                placed_at=current_step,
                reference_price=current_price,
                cost_reserved=cost_reserved,
                tif=tif,
            )
            self.pending_orders.append(order)

            self.executed_orders_history.append(
                {
                    "step": current_step,
                    "type": "limit_buy_placed",
                    "shares": shares_to_buy,
                    "price": limit_price,
                    "tif": tif.value,
                }
            )

    # === Limit Sell Logic ===
    elif action_type == Actions.LimitSell:
        if self.units_held <= 0:
            return
        shares_to_sell = int(self.units_held * amount_pct)
        if shares_to_sell > 0:

            # --- Handle IOC (Immediate or Cancel) ---
            if tif == OrderTIF.IOC:
                # If current price >= limit price, execute immediately
                if current_price >= limit_price:
                    execution_price = limit_price

                    # Calculate revenue
                    revenue = shares_to_sell * execution_price * (1 - self.transaction_cost_pct)

                    self.units_held -= shares_to_sell
                    self.balance += revenue

                    self.executed_orders_history.append(
                        {
                            "step": current_step,
                            "type": "limit_sell_executed_ioc",
                            "shares": shares_to_sell,
                            "price": execution_price,
                            "revenue": revenue,
                        }
                    )
                # If not executable, do nothing (cancel)
                return

            # --- Handle GTC / TTL (Pending) ---
            self.units_held -= shares_to_sell
            order = Order(
                type=OrderType.LIMIT_SELL,
                shares=shares_to_sell,
                price=limit_price,
                placed_at=current_step,
                reference_price=current_price,
                tif=tif,
            )
            self.pending_orders.append(order)

            self.executed_orders_history.append(
                {
                    "step": current_step,
                    "type": "limit_sell_placed",
                    "shares": shares_to_sell,
                    "price": limit_price,
                    "tif": tif.value,
                }
            )

place_risk_management_order(action_type, current_price, amount_pct, price_modifier, current_step, tif=OrderTIF.GTC)

Place a risk management order (stop loss or take profit).

Parameters:

Name Type Description Default
action_type Actions

The type of action (StopLoss/TakeProfit).

required
current_price float

The current market price.

required
amount_pct float

The percentage of the portfolio to use for the order.

required
price_modifier float

The price modifier to apply to the current price.

required
current_step int

The current step in the trading environment.

required
tif OrderTIF

Time in Force. Only GTC and TTL are valid for Stop orders.

GTC

Returns:

Type Description
None

None

Source code in src/quantrl_lab/environments/stock/components/portfolio.py
def place_risk_management_order(
    self,
    action_type: Actions,
    current_price: float,
    amount_pct: float,
    price_modifier: float,
    current_step: int,
    tif: OrderTIF = OrderTIF.GTC,  # Default to GTC (standard for stop loss)
) -> None:
    """
    Place a risk management order (stop loss or take profit).

    Args:
        action_type (Actions): The type of action (StopLoss/TakeProfit).
        current_price (float): The current market price.
        amount_pct (float): The percentage of the portfolio to use for the order.
        price_modifier (float): The price modifier to apply to the current price.
        current_step (int): The current step in the trading environment.
        tif (OrderTIF): Time in Force. Only GTC and TTL are valid for Stop orders.

    Returns:
        None
    """
    # Validate TIF for Stop orders
    if tif == OrderTIF.IOC:
        return  # IOC is invalid for Stop orders (must rest until trigger)

    if self.units_held <= 0:
        return
    shares_to_cover = int(self.units_held * amount_pct)
    if shares_to_cover > 0:
        # === Stop Loss Logic ===
        if action_type == Actions.StopLoss:
            stop_price = current_price * min(0.999, price_modifier)
            if stop_price >= current_price:
                stop_price = current_price * 0.999

            self.units_held -= shares_to_cover

            order = Order(
                type=OrderType.STOP_LOSS, shares=shares_to_cover, price=stop_price, placed_at=current_step, tif=tif
            )
            self.stop_loss_orders.append(order)

            self.executed_orders_history.append(
                {
                    "step": current_step,
                    "type": "stop_loss_placed",
                    "shares": shares_to_cover,
                    "price": stop_price,
                    "tif": tif.value,
                }
            )
        # === Take Profit Logic ===
        elif action_type == Actions.TakeProfit:
            take_profit_price = current_price * max(1.001, price_modifier)
            if take_profit_price <= current_price:
                take_profit_price = current_price * 1.001

            self.units_held -= shares_to_cover

            order = Order(
                type=OrderType.TAKE_PROFIT,
                shares=shares_to_cover,
                price=take_profit_price,
                placed_at=current_step,
                tif=tif,
            )
            self.take_profit_orders.append(order)

            self.executed_orders_history.append(
                {
                    "step": current_step,
                    "type": "take_profit_placed",
                    "shares": shares_to_cover,
                    "price": take_profit_price,
                    "tif": tif.value,
                }
            )