Environments

Interfaces & Protocols

`interfaces`

`TradingEnvProtocol`

Bases: Protocol

Protocol defining the interface for trading environments.

Source code in src/quantrl_lab/environments/core/interfaces.py

class TradingEnvProtocol(Protocol):
    """Protocol defining the interface for trading environments."""

    # Compulsory attributes for trading environments
    data: np.ndarray
    current_step: int
    price_column_index: int
    window_size: int
    action_space: gym.Space
    observation_space: gym.Space

    # Compulsory methods for trading environments
    def step(self, action: Any) -> Tuple[np.ndarray, float, bool, bool, Dict]: ...
    def reset(self, *, seed: Optional[int] = None, options: Optional[Dict] = None) -> Tuple[np.ndarray, Dict]: ...
    def render(self, mode: str = "human"): ...
    def close(self): ...

`BaseActionStrategy`

Bases: ABC

An abstract base class for defining action spaces and handling agent actions.

Source code in src/quantrl_lab/environments/core/interfaces.py

class BaseActionStrategy(ABC):
    """An abstract base class for defining action spaces and handling
    agent actions."""

    @abstractmethod
    def define_action_space(self) -> gym.spaces.Space:
        """
        Defines the action space for the environment.

        Returns:
            gym.spaces.Space: The action space for the environment.
        """
        pass

    @abstractmethod
    def handle_action(self, env_self: TradingEnvProtocol, action: Any) -> Tuple[Any, Dict[str, Any]]:
        """
        Handles the action taken by the agent in the environment.

        Args:
            env_self (TradingEnvProtocol): The environment instance where the action is taken.
            action (Any): The action taken by the agent.

        Returns:
            Tuple[Any, Dict[str, Any]]: The outcome of the action taken in the environment
        """
        pass

`define_action_space()` `abstractmethod`

Defines the action space for the environment.

Returns:

Type	Description
`Space`	gym.spaces.Space: The action space for the environment.

Source code in src/quantrl_lab/environments/core/interfaces.py

@abstractmethod
def define_action_space(self) -> gym.spaces.Space:
    """
    Defines the action space for the environment.

    Returns:
        gym.spaces.Space: The action space for the environment.
    """
    pass

`handle_action(env_self, action)` `abstractmethod`

Handles the action taken by the agent in the environment.

Parameters:

Name	Type	Description	Default
`env_self`	`TradingEnvProtocol`	The environment instance where the action is taken.	required
`action`	`Any`	The action taken by the agent.	required

Returns:

Type	Description
`Tuple[Any, Dict[str, Any]]`	Tuple[Any, Dict[str, Any]]: The outcome of the action taken in the environment

Source code in src/quantrl_lab/environments/core/interfaces.py

@abstractmethod
def handle_action(self, env_self: TradingEnvProtocol, action: Any) -> Tuple[Any, Dict[str, Any]]:
    """
    Handles the action taken by the agent in the environment.

    Args:
        env_self (TradingEnvProtocol): The environment instance where the action is taken.
        action (Any): The action taken by the agent.

    Returns:
        Tuple[Any, Dict[str, Any]]: The outcome of the action taken in the environment
    """
    pass

`BaseObservationStrategy`

Bases: ABC

Abstract base class for defining how an agent perceives the environment.

Source code in src/quantrl_lab/environments/core/interfaces.py

class BaseObservationStrategy(ABC):
    """Abstract base class for defining how an agent perceives the
    environment."""

    @abstractmethod
    def define_observation_space(self, env: TradingEnvProtocol) -> gym.spaces.Space:
        """
        Defines and returns the observation space for the environment.

        Args:
            env (TradingEnvProtocol): The trading environment.

        Returns:
            gym.spaces.Space: The observation space.
        """
        pass

    @abstractmethod
    def build_observation(self, env: TradingEnvProtocol) -> np.ndarray:
        """
        Builds the observation vector for the current state.

        Args:
            env (TradingEnvProtocol): The trading environment.

        Returns:
            np.ndarray: The observation vector.
        """
        pass

    @abstractmethod
    def get_feature_names(self, env: TradingEnvProtocol) -> List[str]:
        """
        Returns a list of feature names corresponding to the exact order
        of elements in the flattened observation vector.

        Args:
            env (TradingEnvProtocol): The trading environment.

        Returns:
            List[str]: A list of feature names (e.g., ["Close_t-1", "RSI_t", ...])
        """
        pass

`define_observation_space(env)` `abstractmethod`

Defines and returns the observation space for the environment.

Parameters:

Name	Type	Description	Default
`env`	`TradingEnvProtocol`	The trading environment.	required

Returns:

Type	Description
`Space`	gym.spaces.Space: The observation space.

Source code in src/quantrl_lab/environments/core/interfaces.py

@abstractmethod
def define_observation_space(self, env: TradingEnvProtocol) -> gym.spaces.Space:
    """
    Defines and returns the observation space for the environment.

    Args:
        env (TradingEnvProtocol): The trading environment.

    Returns:
        gym.spaces.Space: The observation space.
    """
    pass

`build_observation(env)` `abstractmethod`

Builds the observation vector for the current state.

Parameters:

Name	Type	Description	Default
`env`	`TradingEnvProtocol`	The trading environment.	required

Returns:

Type	Description
`ndarray`	np.ndarray: The observation vector.

Source code in src/quantrl_lab/environments/core/interfaces.py

@abstractmethod
def build_observation(self, env: TradingEnvProtocol) -> np.ndarray:
    """
    Builds the observation vector for the current state.

    Args:
        env (TradingEnvProtocol): The trading environment.

    Returns:
        np.ndarray: The observation vector.
    """
    pass

`get_feature_names(env)` `abstractmethod`

Returns a list of feature names corresponding to the exact order of elements in the flattened observation vector.

Parameters:

Name	Type	Description	Default
`env`	`TradingEnvProtocol`	The trading environment.	required

Returns:

Type	Description
`List[str]`	List[str]: A list of feature names (e.g., ["Close_t-1", "RSI_t", ...])

Source code in src/quantrl_lab/environments/core/interfaces.py

@abstractmethod
def get_feature_names(self, env: TradingEnvProtocol) -> List[str]:
    """
    Returns a list of feature names corresponding to the exact order
    of elements in the flattened observation vector.

    Args:
        env (TradingEnvProtocol): The trading environment.

    Returns:
        List[str]: A list of feature names (e.g., ["Close_t-1", "RSI_t", ...])
    """
    pass

`BaseRewardStrategy`

Bases: ABC

Abstract base class for calculating rewards.

Source code in src/quantrl_lab/environments/core/interfaces.py

class BaseRewardStrategy(ABC):
    """Abstract base class for calculating rewards."""

    @abstractmethod
    def calculate_reward(self, env: TradingEnvProtocol) -> float:
        """
        Calculate the reward based on the action taken in the
        environment.

        Args:
            env (TradingEnvProtocol): The trading environment instance.

        Returns:
            float: The calculated reward.
        """
        raise NotImplementedError("Subclasses should implement this method.")

    def on_step_end(self, env: TradingEnvProtocol):
        """Optional: A hook to update any internal state if needed."""
        pass

`calculate_reward(env)` `abstractmethod`

Calculate the reward based on the action taken in the environment.

Parameters:

Name	Type	Description	Default
`env`	`TradingEnvProtocol`	The trading environment instance.	required

Returns:

Name	Type	Description
`float`	`float`	The calculated reward.

Source code in src/quantrl_lab/environments/core/interfaces.py

@abstractmethod
def calculate_reward(self, env: TradingEnvProtocol) -> float:
    """
    Calculate the reward based on the action taken in the
    environment.

    Args:
        env (TradingEnvProtocol): The trading environment instance.

    Returns:
        float: The calculated reward.
    """
    raise NotImplementedError("Subclasses should implement this method.")

`on_step_end(env)`

Optional: A hook to update any internal state if needed.

Source code in src/quantrl_lab/environments/core/interfaces.py

def on_step_end(self, env: TradingEnvProtocol):
    """Optional: A hook to update any internal state if needed."""
    pass

Types

`types`

Configuration

`config`

`SimulationConfig`

Bases: BaseModel

Configuration for market simulation parameters.

Source code in src/quantrl_lab/environments/stock/components/config.py

class SimulationConfig(BaseModel):
    """Configuration for market simulation parameters."""

    transaction_cost_pct: float = Field(
        default=0.001, ge=0, lt=1, description="The percentage fee for each transaction."
    )
    slippage: float = Field(default=0.001, ge=0, lt=1, description="The slippage percentage for market orders.")
    order_expiration_steps: int = Field(
        default=5, gt=0, description="The number of steps before a pending order expires."
    )
    enable_shorting: bool = Field(default=False, description="Whether to allow short selling.")
    ignore_fees: bool = Field(default=False, description="Whether to ignore transaction costs.")

`RewardConfig`

Bases: BaseModel

Configuration for reward calculation parameters.

Source code in src/quantrl_lab/environments/stock/components/config.py

class RewardConfig(BaseModel):
    """Configuration for reward calculation parameters."""

    clip_range: Tuple[float, float] = Field(default=(-1.0, 1.0), description="Range to clip the final reward.")

`SingleStockEnvConfig`

Bases: CoreEnvConfig

Stock environment configuration, extending the core environment configuration.

Source code in src/quantrl_lab/environments/stock/components/config.py

class SingleStockEnvConfig(CoreEnvConfig):
    """Stock environment configuration, extending the core environment
    configuration."""

    # Core Defaults
    initial_balance: float = 100000.0
    window_size: int = 20
    price_column_index: int = 0

    # Components
    simulation: SimulationConfig = Field(default_factory=SimulationConfig)
    rewards: RewardConfig = Field(default_factory=RewardConfig)

    class Config:
        from_attributes = True  # "ORM Mode"

SingleStockTradingEnv

`single`

`SingleStockTradingEnv`

Bases: Env

Source code in src/quantrl_lab/environments/stock/single.py

class SingleStockTradingEnv(gym.Env):
    # Added metadata for Gymnasium compatibility
    metadata = {"render_modes": ["human", "ansi"], "render_fps": 4}

    def __init__(
        self,
        data: Union[pd.DataFrame, np.ndarray],  # DataFrame or numpy array of market data + features
        config: SingleStockEnvConfig,  # Configuration object for environment settings
        action_strategy: BaseActionStrategy,  # Strategy for defining action space and handling actions,
        reward_strategy: BaseRewardStrategy,  # Strategy for calculating rewards
        observation_strategy: BaseObservationStrategy,
        price_column: Optional[Union[str, int]] = None,  # Column name or index for price (auto-detected if None)
    ):
        super().__init__()

        # === Handle DataFrame input with auto-detection ===
        if isinstance(data, pd.DataFrame):
            self.original_columns = data.columns.tolist()
            # Auto-detect price column if not specified
            if price_column is None:
                self.price_column_index = auto_detect_price_column(data)
            elif isinstance(price_column, str):
                if price_column not in data.columns:
                    raise ValueError(
                        f"Price column '{price_column}' not found in DataFrame. Available columns: {list(data.columns)}"
                    )
                self.price_column_index = data.columns.get_loc(price_column)
            elif isinstance(price_column, int):
                if not (0 <= price_column < len(data.columns)):
                    raise ValueError(
                        f"Price column index {price_column} out of bounds. DataFrame has {len(data.columns)} columns."
                    )
                self.price_column_index = price_column
            else:
                raise ValueError("price_column must be a string (column name), integer (index), or None (auto-detect)")

            # Auto-detect OHLC columns for better execution simulation
            self.open_column_index = detect_column_index(data, ["Open", "open"])
            self.high_column_index = detect_column_index(data, ["High", "high"])
            self.low_column_index = detect_column_index(data, ["Low", "low"])

            # Convert DataFrame to numpy array
            data_array = data.values.astype(np.float32)
        else:
            # Handle numpy array input (existing behavior)
            self.original_columns = None
            self.open_column_index = None
            self.high_column_index = None
            self.low_column_index = None

            if price_column is None:
                if hasattr(config, "price_column_index") and config.price_column_index is not None:
                    self.price_column_index = config.price_column_index
                else:
                    raise ValueError("price_column must be provided when using numpy arrays")
            elif isinstance(price_column, int):
                self.price_column_index = price_column
            else:
                raise ValueError("price_column must be an integer index when using numpy arrays")

            data_array = data.astype(np.float32)

        # === Runtime error handling ===
        if data_array.ndim != 2:
            raise ValueError("Data must be a 2D array (num_steps, num_features).")
        if data_array.shape[0] <= config.window_size:
            raise ValueError("Data length must be greater than window_size.")
        if not (0 <= self.price_column_index < data_array.shape[1]):
            raise ValueError(f"price_column_index ({self.price_column_index}) is out of bounds.")

        # === Attributes ===
        self.Actions = Actions  # reference to the Actions class for easy access
        self.data = data_array  # Already converted to float32 above
        self.num_steps, self.num_features = self.data.shape
        self.window_size = config.window_size
        self._max_steps = self.num_steps - 1  # Max indexable step (data limit)

        # Set max episode steps - if None, use full data length
        self.max_episode_steps = config.max_episode_steps
        if self.max_episode_steps is None:
            self.max_episode_steps = self._max_steps - self.window_size + 1

        # Track episode steps separately from data steps
        self.episode_step = 0

        # Initialize the portfolio
        self.portfolio = StockPortfolio(
            initial_balance=config.initial_balance,
            transaction_cost_pct=config.simulation.transaction_cost_pct,
            slippage=config.simulation.slippage,
            order_expiration_steps=config.simulation.order_expiration_steps,
        )
        # TODO: consider other ways to handle expiration, e.g., GTC etc.

        # === Define the strategies for action, reward, and observation ===
        self.action_strategy = action_strategy
        self.reward_strategy = reward_strategy
        self.observation_strategy = observation_strategy
        # === Delegate the action space and observation space definitions to the strategies ===
        # This allows for more modular and flexible design, where each strategy can define its own logic
        # for actions and observations without cluttering the environment class.
        self.action_space = self.action_strategy.define_action_space()
        self.observation_space = self.observation_strategy.define_observation_space(self)

        # === Example action space values:
        # Market Buy 50% of available balance
        # [1.0, 0.5, 1.0]  # Action type 1, 50% amount, price modifier ignored

        # Limit Sell 75% of shares at 5% above market price
        # [4.0, 0.75, 1.05]  # Action type 4, 75% amount, 5% above price

        # Stop Loss 100% of shares at 10% below market price
        # [5.0, 1.0, 0.9]  # Action type 5, 100% amount, 10% below price
        # ================================================================

        # === Initialize some environment state variables ===
        self.reward_clip_range = config.rewards.clip_range
        self.prev_portfolio_value = 0.0
        self.action_type = None
        self.decoded_action_info = {}
        self.current_step = 0

    def step(self, action: np.ndarray) -> Tuple[np.ndarray, float, bool, bool, Dict]:
        """
        Execute one time step within the environment.

        Args:
            action (np.ndarray): The action to execute.

        Raises:
            ValueError: If the action is not valid.

        Returns:
            Tuple[np.ndarray, float, bool, bool, Dict]: The observation, reward, terminated, truncated, and info.
        """
        # === Input validation ===
        if not isinstance(action, np.ndarray) or action.shape != self.action_space.shape:
            raise ValueError(
                f"Invalid action format received in step: {action}. Expected shape {self.action_space.shape}"
            )

        # === Step Execution ===

        # 1. Get the current price and store the portfolio value BEFORE any changes happen in this step
        # This is important for reward calculations, as we need to know the previous value
        # of the portfolio before applying the new action.
        current_price = self._get_current_price()
        self.prev_portfolio_value = self.portfolio.get_value(current_price)

        # 2. Process any open orders that might be pending from previous steps.
        # Extract OHLC data if available for realistic execution
        current_high, current_low, current_open = None, None, None

        if self.high_column_index is not None:
            current_high = float(self.data[self.current_step, self.high_column_index])
        if self.low_column_index is not None:
            current_low = float(self.data[self.current_step, self.low_column_index])
        if self.open_column_index is not None:
            current_open = float(self.data[self.current_step, self.open_column_index])

        self.portfolio.process_open_orders(
            self.current_step,
            current_price,
            current_high=current_high,
            current_low=current_low,
            current_open=current_open,
        )

        # 3. Handle the new action and STORE the results on `self`.
        # The reward strategies will access these via `env.action_type` and `env.decoded_action_info`.
        self.action_type, self.decoded_action_info = self.action_strategy.handle_action(self, action)

        # 4. Advance time and check termination/truncation conditions
        # Increment the current step and episode step
        if self.current_step >= self._max_steps:
            raise ValueError("Cannot step beyond the maximum number of steps in the environment.")

        self.current_step += 1
        self.episode_step += 1
        current_price = self._get_current_price()

        # Determine termination and truncation
        # terminated: natural end of episode (reached end of data)
        # truncated: artificial time limit (max_episode_steps reached)
        terminated = self.current_step >= self._max_steps
        truncated = self.episode_step >= self.max_episode_steps

        # 5. Reward Calculation. This is delegated to the reward strategy.
        # We pass `self` so the strategy has full access to the environment's state.
        reward = self.reward_strategy.calculate_reward(self)

        # 6. Clip the final, combined reward (good practice to keep this).
        reward = np.clip(reward, *self.reward_clip_range).item()

        # 7. Call the 'on_step_end' hook for stateful strategies to update their internal memory.
        self.reward_strategy.on_step_end(self)

        # 8. Get next observation (no change here).
        observation = self.observation_strategy.build_observation(self)

        # 9. Build the info dictionary.
        # This contains useful information about the current state of the environment,
        # including portfolio value, balance, shares held, and the last executed order.
        # This is useful for debugging and analysis.
        # It can also be used by the reward strategy to provide additional context for reward calculation.
        info = self._build_info_dict()

        return observation, reward, terminated, truncated, info

    def reset(self, *, seed: Optional[int] = None, options: Optional[Dict] = None) -> Tuple[np.ndarray, Dict]:
        """
        Resets the environment to an initial state and returns the
        initial observation.

        Args:
            seed (Optional[int], optional): Random seed for reproducibility. Defaults to None.
            options (Optional[Dict], optional): Additional options for resetting the environment. Defaults to None.

        Returns:
            Tuple[np.ndarray, Dict]: Initial observation and info dictionary.
        """
        super().reset(seed=seed)

        # 1. Reset the current step to the initial state.
        # This is typically the first step after the initial observation.
        # We set it to the window size to ensure we have enough data for the first observation
        # and to avoid index errors.
        self.current_step = self.window_size

        # Reset episode step counter
        self.episode_step = 0

        # 2. Reset the portfolio to its initial state.
        # This clears any pending orders, resets the balance, and prepares the portfolio
        # for a new episode.
        # Note: This does not reset the portfolio's initial balance, which is set in the
        # StockPortfolio constructor. It only clears the current state.
        self.portfolio.reset()

        # 3. Reset the action type and decoded action info.
        # This is important to ensure that the environment starts fresh without any
        # lingering state from previous episodes.
        initial_observation = self.observation_strategy.build_observation(self)
        info = {
            "initial_balance": self.portfolio.initial_balance,
            "starting_step": self.current_step,
            "message": "Environment reset.",
        }
        return initial_observation, info

    def render(self, mode="human"):
        """Renders the environment state."""
        if mode == "ansi":
            return self._render_ansi()
        elif mode == "human":
            self._render_human()

    def _render_human(self):
        """Prints state information to the console."""
        current_price = self._get_current_price()
        portfolio_value = self.portfolio.get_value(current_price)
        total_shares = self.portfolio.total_shares

        print("-" * 40)
        print(f"Data Step:    {self.current_step}/{self._max_steps}")
        print(f"Episode Step: {self.episode_step}/{self.max_episode_steps}")
        print(f"Current Price:{current_price:>15.2f}")
        print(f"Balance:      {self.portfolio.balance:>15.2f}")
        print(f"Shares Held:  {self.portfolio.shares_held:>15} (Free)")
        print(f"Total Shares: {total_shares:>15} (Free + Reserved)")
        print(f"Portfolio Val:{portfolio_value:>15.2f}")
        print("-" * 40)
        print("Active Orders:")
        print(f" Pending Limit:{len(self.portfolio.pending_orders):>5}")
        print(f"  Stop Loss:    {len(self.portfolio.stop_loss_orders):>5}")
        print(f"  Take Profit:  {len(self.portfolio.take_profit_orders):>5}")

        if self.portfolio.executed_orders_history:
            last_event = self.portfolio.executed_orders_history[-1]
            print("-" * 40)
            price_value = last_event.get("price")

            # Check if price is a number before applying format
            if isinstance(price_value, (int, float)):
                price_str = f"{price_value:.2f}"
            else:
                price_str = str(price_value)

            print(
                f"Last Event:   {last_event['type']} "
                f"(Shares: {last_event.get('shares', 'N/A')}, "
                f"Price: {price_str})"
            )

        print("-" * 40)

    def _render_ansi(self) -> str:
        """Returns state information as a string."""
        current_price = self._get_current_price()
        portfolio_value = self.portfolio.get_value(current_price)
        total_shares = self.portfolio.total_shares
        last_event_str = "None"

        if self.portfolio.executed_orders_history:
            last_event = self.portfolio.executed_orders_history[-1]
            price_value = last_event.get("price", "N/A")

            # Check if price is a number before applying format
            if isinstance(price_value, (int, float)):
                price_str = f"{price_value:.2f}"
            else:
                price_str = str(price_value)

            last_event_str = f"{last_event['type']} (S:{last_event.get('shares', 'N/A')}, P:{price_str})"

        return (
            f"Data Step: {self.current_step}/{self._max_steps} | "
            f"Episode Step: {self.episode_step}/{self.max_episode_steps} | "
            f"Price: {current_price:.2f} | "
            f"Balance: {self.portfolio.balance:.2f} | "
            f"Shares(F/T): {self.portfolio.shares_held}/{total_shares} | "
            f"Value: {portfolio_value:.2f} | "
            f"Orders(P/SL/TP): {len(self.portfolio.pending_orders)}/"
            f"{len(self.portfolio.stop_loss_orders)}/"
            f"{len(self.portfolio.take_profit_orders)} | "
            f"Last Event: {last_event_str}"
        )

    def close(self):
        print("SingleStockTradingEnv closed.")

    def _get_current_price(self) -> float:
        """
        Get the current price from the data array based on the current
        step.

        Returns:
            float: The current price at the current step.
        """
        if 0 <= self.current_step < self.num_steps:
            return float(self.data[self.current_step, self.price_column_index])
        else:
            # If step is out of bounds (e.g., after done), return the last known price
            if self.num_steps > 0:
                last_valid_step = min(self.current_step, self.num_steps - 1)
                return float(self.data[last_valid_step, self.price_column_index])
            else:
                raise ValueError(
                    f"No valid price data available at step {self.current_step} (data length: {self.num_steps})"
                )

    def _build_info_dict(self) -> Dict[str, Any]:
        """
        Builds an information dictionary for the current environment
        state.

        Returns:
            Dict[str, Any]: A dictionary containing relevant information about the environment state.
        """
        current_price = self._get_current_price()
        return {
            "step": self.current_step,
            "episode_step": self.episode_step,
            "max_episode_steps": self.max_episode_steps,
            "portfolio_value": self.portfolio.get_value(current_price),
            "balance": self.portfolio.balance,
            "shares_held": self.portfolio.shares_held,
            "total_shares": self.portfolio.total_shares,
            "current_price": current_price,
            "reward": self.reward_strategy.calculate_reward(self),  # Re-calculate for info or store from step
            "action_decoded": self.decoded_action_info,
            "orders_info": {
                "pending_count": len(self.portfolio.pending_orders),
                "stop_loss_count": len(self.portfolio.stop_loss_orders),
                "take_profit_count": len(self.portfolio.take_profit_orders),
            },
            "last_order_event": (
                self.portfolio.executed_orders_history[-1] if self.portfolio.executed_orders_history else None
            ),
        }

`step(action)`

Execute one time step within the environment.

Parameters:

Name	Type	Description	Default
`action`	`ndarray`	The action to execute.	required

Raises:

Type	Description
`ValueError`	If the action is not valid.

Returns:

Type	Description
`Tuple[ndarray, float, bool, bool, Dict]`	Tuple[np.ndarray, float, bool, bool, Dict]: The observation, reward, terminated, truncated, and info.

Source code in src/quantrl_lab/environments/stock/single.py

def step(self, action: np.ndarray) -> Tuple[np.ndarray, float, bool, bool, Dict]:
    """
    Execute one time step within the environment.

    Args:
        action (np.ndarray): The action to execute.

    Raises:
        ValueError: If the action is not valid.

    Returns:
        Tuple[np.ndarray, float, bool, bool, Dict]: The observation, reward, terminated, truncated, and info.
    """
    # === Input validation ===
    if not isinstance(action, np.ndarray) or action.shape != self.action_space.shape:
        raise ValueError(
            f"Invalid action format received in step: {action}. Expected shape {self.action_space.shape}"
        )

    # === Step Execution ===

    # 1. Get the current price and store the portfolio value BEFORE any changes happen in this step
    # This is important for reward calculations, as we need to know the previous value
    # of the portfolio before applying the new action.
    current_price = self._get_current_price()
    self.prev_portfolio_value = self.portfolio.get_value(current_price)

    # 2. Process any open orders that might be pending from previous steps.
    # Extract OHLC data if available for realistic execution
    current_high, current_low, current_open = None, None, None

    if self.high_column_index is not None:
        current_high = float(self.data[self.current_step, self.high_column_index])
    if self.low_column_index is not None:
        current_low = float(self.data[self.current_step, self.low_column_index])
    if self.open_column_index is not None:
        current_open = float(self.data[self.current_step, self.open_column_index])

    self.portfolio.process_open_orders(
        self.current_step,
        current_price,
        current_high=current_high,
        current_low=current_low,
        current_open=current_open,
    )

    # 3. Handle the new action and STORE the results on `self`.
    # The reward strategies will access these via `env.action_type` and `env.decoded_action_info`.
    self.action_type, self.decoded_action_info = self.action_strategy.handle_action(self, action)

    # 4. Advance time and check termination/truncation conditions
    # Increment the current step and episode step
    if self.current_step >= self._max_steps:
        raise ValueError("Cannot step beyond the maximum number of steps in the environment.")

    self.current_step += 1
    self.episode_step += 1
    current_price = self._get_current_price()

    # Determine termination and truncation
    # terminated: natural end of episode (reached end of data)
    # truncated: artificial time limit (max_episode_steps reached)
    terminated = self.current_step >= self._max_steps
    truncated = self.episode_step >= self.max_episode_steps

    # 5. Reward Calculation. This is delegated to the reward strategy.
    # We pass `self` so the strategy has full access to the environment's state.
    reward = self.reward_strategy.calculate_reward(self)

    # 6. Clip the final, combined reward (good practice to keep this).
    reward = np.clip(reward, *self.reward_clip_range).item()

    # 7. Call the 'on_step_end' hook for stateful strategies to update their internal memory.
    self.reward_strategy.on_step_end(self)

    # 8. Get next observation (no change here).
    observation = self.observation_strategy.build_observation(self)

    # 9. Build the info dictionary.
    # This contains useful information about the current state of the environment,
    # including portfolio value, balance, shares held, and the last executed order.
    # This is useful for debugging and analysis.
    # It can also be used by the reward strategy to provide additional context for reward calculation.
    info = self._build_info_dict()

    return observation, reward, terminated, truncated, info

`reset(*, seed=None, options=None)`

Resets the environment to an initial state and returns the initial observation.

Parameters:

Name	Type	Description	Default
`seed`	`Optional[int]`	Random seed for reproducibility. Defaults to None.	`None`
`options`	`Optional[Dict]`	Additional options for resetting the environment. Defaults to None.	`None`

Returns:

Type	Description
`Tuple[ndarray, Dict]`	Tuple[np.ndarray, Dict]: Initial observation and info dictionary.

Source code in src/quantrl_lab/environments/stock/single.py

def reset(self, *, seed: Optional[int] = None, options: Optional[Dict] = None) -> Tuple[np.ndarray, Dict]:
    """
    Resets the environment to an initial state and returns the
    initial observation.

    Args:
        seed (Optional[int], optional): Random seed for reproducibility. Defaults to None.
        options (Optional[Dict], optional): Additional options for resetting the environment. Defaults to None.

    Returns:
        Tuple[np.ndarray, Dict]: Initial observation and info dictionary.
    """
    super().reset(seed=seed)

    # 1. Reset the current step to the initial state.
    # This is typically the first step after the initial observation.
    # We set it to the window size to ensure we have enough data for the first observation
    # and to avoid index errors.
    self.current_step = self.window_size

    # Reset episode step counter
    self.episode_step = 0

    # 2. Reset the portfolio to its initial state.
    # This clears any pending orders, resets the balance, and prepares the portfolio
    # for a new episode.
    # Note: This does not reset the portfolio's initial balance, which is set in the
    # StockPortfolio constructor. It only clears the current state.
    self.portfolio.reset()

    # 3. Reset the action type and decoded action info.
    # This is important to ensure that the environment starts fresh without any
    # lingering state from previous episodes.
    initial_observation = self.observation_strategy.build_observation(self)
    info = {
        "initial_balance": self.portfolio.initial_balance,
        "starting_step": self.current_step,
        "message": "Environment reset.",
    }
    return initial_observation, info

`render(mode='human')`

Renders the environment state.

Source code in src/quantrl_lab/environments/stock/single.py

def render(self, mode="human"):
    """Renders the environment state."""
    if mode == "ansi":
        return self._render_ansi()
    elif mode == "human":
        self._render_human()

Portfolio

`portfolio`

`StockPortfolio`

Bases: Portfolio

A portfolio for stock trading that handles complex order types, fees, and slippage.

It extends the simple Portfolio with stock-specific logic and state.

Source code in src/quantrl_lab/environments/stock/components/portfolio.py

class StockPortfolio(Portfolio):
    """
    A portfolio for stock trading that handles complex order types,
    fees, and slippage.

    It extends the simple Portfolio with stock-specific logic and state.
    """

    def __init__(
        self,
        initial_balance: float,
        transaction_cost_pct: float,
        slippage: float,
        order_expiration_steps: int,
    ):
        # === Initialize the parent class with the part it cares about ===
        super().__init__(initial_balance=initial_balance)

        # === Transaction cost and slippage can be adjusted to reflect difficulties in trading ===
        self.transaction_cost_pct = transaction_cost_pct
        self.slippage = slippage
        self.order_expiration_steps = order_expiration_steps

        # === Stock-specific state ===
        # Using strict typing with dataclasses instead of generic dicts
        self.pending_orders: List[Order] = []
        self.stop_loss_orders: List[Order] = []
        self.take_profit_orders: List[Order] = []

        # We keep history as Dict for now to allow flexible logging and compatibility with existing renderers
        self.executed_orders_history: List[Dict[str, Any]] = []

    def reset(self) -> None:
        """Reset the portfolio to its initial state."""
        super().reset()
        self.pending_orders = []
        self.stop_loss_orders = []
        self.take_profit_orders = []
        self.executed_orders_history = []

    @property
    def shares_held(self) -> int:
        """
        Returns the number of shares currently held in the portfolio.

        Returns:
            int: The number of shares held.
        """
        return self.units_held

    @property
    def total_shares(self) -> int:
        """
        Returns the total number of shares held, including those
        reserved in orders.

        Returns:
            int: The total number of shares held.
        """
        return self.units_held + self._get_reserved_shares()

    def get_value(self, current_price: float) -> float:
        """
        Calculate the total value of the portfolio including unfilled
        orders and reserved money.

        Args:
            current_price (float): The current market price of the asset.

        Returns:
            float: The total portfolio value including all positions and reserved amounts.
        """
        # Base value: free balance + value of free shares
        total_value = self.balance + (self.units_held * current_price)

        # Add reserved cash from pending buy orders
        for order in self.pending_orders:
            if order.type == OrderType.LIMIT_BUY:
                total_value += order.cost_reserved

        # Add value of shares reserved in pending sell orders
        for order in self.pending_orders:
            if order.type == OrderType.LIMIT_SELL:
                total_value += order.shares * current_price

        # Add value of shares reserved in stop loss orders
        for order in self.stop_loss_orders:
            total_value += order.shares * current_price

        # Add value of shares reserved in take profit orders
        for order in self.take_profit_orders:
            total_value += order.shares * current_price

        return total_value

    def process_open_orders(
        self,
        current_step: int,
        current_price: float,
        current_high: Optional[float] = None,
        current_low: Optional[float] = None,
        current_open: Optional[float] = None,
    ) -> None:
        """
        Process all open orders using OHLC data for realistic execution.

        Args:
            current_step (int): The current step in the trading environment.
            current_price (float): The current close price.
            current_high (Optional[float]): High price of the bar. Defaults to current_price.
            current_low (Optional[float]): Low price of the bar. Defaults to current_price.
            current_open (Optional[float]): Open price of the bar. Defaults to current_price.
        """
        # Fallback for Close-only execution (backward compatibility)
        if current_high is None:
            current_high = current_price
        if current_low is None:
            current_low = current_price
        if current_open is None:
            current_open = current_price

        self._process_pending_orders(current_step, current_price, current_high, current_low, current_open)
        self._process_risk_management_orders(current_step, current_price, current_high, current_low, current_open)

    def execute_market_order(
        self, action_type: Actions, current_price: float, amount_pct: float, current_step: int
    ) -> None:
        """
        Execute a market order.

        Args:
            action_type (Actions): The type of action (buy/sell).
            current_price (float): The current market price.
            amount_pct (float): The percentage of the portfolio to use for the order.
            current_step (int): The current step in the trading environment.

        Returns:
            None
        """
        # Clip amount_pct to valid range
        amount_pct = max(0.0, min(1.0, amount_pct))

        # === Runtime error checks ===
        if self.balance <= 0 and action_type == Actions.Buy:
            return  # Insufficient balance to execute buy order
        if action_type not in [Actions.Buy, Actions.Sell]:
            raise ValueError("Invalid action type for market order")

        # === Buy Logic ===
        if action_type == Actions.Buy:
            adjusted_price = current_price * (1 + self.slippage)
            cost_per_share = adjusted_price * (1 + self.transaction_cost_pct)
            if cost_per_share <= 1e-9:
                return  # Avoid division by zero

            shares_to_buy = int((self.balance / cost_per_share) * amount_pct)
            if shares_to_buy > 0:
                actual_cost = shares_to_buy * cost_per_share
                if actual_cost <= self.balance:
                    self.balance -= actual_cost
                    self.units_held += shares_to_buy
                    self.executed_orders_history.append(
                        {
                            "step": current_step,
                            "type": "market_buy",
                            "shares": shares_to_buy,
                            "price": adjusted_price,
                            "cost": actual_cost,
                        }
                    )

        # === Sell Logic ===
        elif action_type == Actions.Sell:
            if self.units_held <= 0:
                return
            shares_to_sell = int(self.units_held * amount_pct)
            if shares_to_sell > 0:
                adjusted_price = current_price * (1 - self.slippage)
                revenue = shares_to_sell * adjusted_price * (1 - self.transaction_cost_pct)
                self.units_held -= shares_to_sell
                self.balance += revenue
                self.executed_orders_history.append(
                    {
                        "step": current_step,
                        "type": "market_sell",
                        "shares": shares_to_sell,
                        "price": adjusted_price,
                        "revenue": revenue,
                    }
                )

    def place_limit_order(
        self,
        action_type: Actions,
        current_price: float,
        amount_pct: float,
        price_modifier: float,
        current_step: int,
        tif: OrderTIF = OrderTIF.TTL,  # Default to TTL to preserve previous behavior
    ) -> None:
        """
        Place a limit order for buying or selling an asset.

        Args:
            action_type (Actions): The type of action (LimitBuy/LimitSell).
            current_price (float): The current market price.
            amount_pct (float): The percentage of the portfolio to use for the order.
            price_modifier (float): The price modifier to apply to the current price.
            current_step (int): The current step in the trading environment.
            tif (OrderTIF): Time in Force for the order.

        Returns:
            None
        """
        limit_price = current_price * price_modifier

        # === Limit Buy Logic ===
        if action_type == Actions.LimitBuy:
            cost_per_share = limit_price * (1 + self.transaction_cost_pct)
            if cost_per_share <= 1e-9:
                return
            shares_to_buy = int((self.balance / cost_per_share) * amount_pct)
            if shares_to_buy > 0:
                cost_reserved = shares_to_buy * cost_per_share

                # Check balance
                if cost_reserved > self.balance:
                    return

                # --- Handle IOC (Immediate or Cancel) ---
                if tif == OrderTIF.IOC:
                    # If current price <= limit price, execute immediately
                    if current_price <= limit_price:
                        # IOC Execution matches logic of standard execution
                        execution_price = limit_price  # or current_price? Standard logic uses limit_price

                        self.balance -= cost_reserved
                        # Add shares (execution success)
                        self.units_held += shares_to_buy

                        self.executed_orders_history.append(
                            {
                                "step": current_step,
                                "type": "limit_buy_executed_ioc",
                                "shares": shares_to_buy,
                                "price": execution_price,
                                "cost": cost_reserved,
                            }
                        )
                    # If not executable, do nothing (cancel)
                    return

                # --- Handle GTC / TTL (Pending) ---
                self.balance -= cost_reserved
                order = Order(
                    type=OrderType.LIMIT_BUY,
                    shares=shares_to_buy,
                    price=limit_price,
                    placed_at=current_step,
                    reference_price=current_price,
                    cost_reserved=cost_reserved,
                    tif=tif,
                )
                self.pending_orders.append(order)

                self.executed_orders_history.append(
                    {
                        "step": current_step,
                        "type": "limit_buy_placed",
                        "shares": shares_to_buy,
                        "price": limit_price,
                        "tif": tif.value,
                    }
                )

        # === Limit Sell Logic ===
        elif action_type == Actions.LimitSell:
            if self.units_held <= 0:
                return
            shares_to_sell = int(self.units_held * amount_pct)
            if shares_to_sell > 0:

                # --- Handle IOC (Immediate or Cancel) ---
                if tif == OrderTIF.IOC:
                    # If current price >= limit price, execute immediately
                    if current_price >= limit_price:
                        execution_price = limit_price

                        # Calculate revenue
                        revenue = shares_to_sell * execution_price * (1 - self.transaction_cost_pct)

                        self.units_held -= shares_to_sell
                        self.balance += revenue

                        self.executed_orders_history.append(
                            {
                                "step": current_step,
                                "type": "limit_sell_executed_ioc",
                                "shares": shares_to_sell,
                                "price": execution_price,
                                "revenue": revenue,
                            }
                        )
                    # If not executable, do nothing (cancel)
                    return

                # --- Handle GTC / TTL (Pending) ---
                self.units_held -= shares_to_sell
                order = Order(
                    type=OrderType.LIMIT_SELL,
                    shares=shares_to_sell,
                    price=limit_price,
                    placed_at=current_step,
                    reference_price=current_price,
                    tif=tif,
                )
                self.pending_orders.append(order)

                self.executed_orders_history.append(
                    {
                        "step": current_step,
                        "type": "limit_sell_placed",
                        "shares": shares_to_sell,
                        "price": limit_price,
                        "tif": tif.value,
                    }
                )

    def place_risk_management_order(
        self,
        action_type: Actions,
        current_price: float,
        amount_pct: float,
        price_modifier: float,
        current_step: int,
        tif: OrderTIF = OrderTIF.GTC,  # Default to GTC (standard for stop loss)
    ) -> None:
        """
        Place a risk management order (stop loss or take profit).

        Args:
            action_type (Actions): The type of action (StopLoss/TakeProfit).
            current_price (float): The current market price.
            amount_pct (float): The percentage of the portfolio to use for the order.
            price_modifier (float): The price modifier to apply to the current price.
            current_step (int): The current step in the trading environment.
            tif (OrderTIF): Time in Force. Only GTC and TTL are valid for Stop orders.

        Returns:
            None
        """
        # Validate TIF for Stop orders
        if tif == OrderTIF.IOC:
            return  # IOC is invalid for Stop orders (must rest until trigger)

        if self.units_held <= 0:
            return
        shares_to_cover = int(self.units_held * amount_pct)
        if shares_to_cover > 0:
            # === Stop Loss Logic ===
            if action_type == Actions.StopLoss:
                stop_price = current_price * min(0.999, price_modifier)
                if stop_price >= current_price:
                    stop_price = current_price * 0.999

                self.units_held -= shares_to_cover

                order = Order(
                    type=OrderType.STOP_LOSS, shares=shares_to_cover, price=stop_price, placed_at=current_step, tif=tif
                )
                self.stop_loss_orders.append(order)

                self.executed_orders_history.append(
                    {
                        "step": current_step,
                        "type": "stop_loss_placed",
                        "shares": shares_to_cover,
                        "price": stop_price,
                        "tif": tif.value,
                    }
                )
            # === Take Profit Logic ===
            elif action_type == Actions.TakeProfit:
                take_profit_price = current_price * max(1.001, price_modifier)
                if take_profit_price <= current_price:
                    take_profit_price = current_price * 1.001

                self.units_held -= shares_to_cover

                order = Order(
                    type=OrderType.TAKE_PROFIT,
                    shares=shares_to_cover,
                    price=take_profit_price,
                    placed_at=current_step,
                    tif=tif,
                )
                self.take_profit_orders.append(order)

                self.executed_orders_history.append(
                    {
                        "step": current_step,
                        "type": "take_profit_placed",
                        "shares": shares_to_cover,
                        "price": take_profit_price,
                        "tif": tif.value,
                    }
                )

    # === Private Helper Methods ===
    def _get_reserved_shares(self) -> int:
        """
        Get the total number of shares reserved for open orders.

        Returns:
            int: The total number of shares reserved.
        """
        reserved_sl = sum(order.shares for order in self.stop_loss_orders)
        reserved_tp = sum(order.shares for order in self.take_profit_orders)
        reserved_limit_sell = sum(order.shares for order in self.pending_orders if order.type == OrderType.LIMIT_SELL)
        return reserved_sl + reserved_tp + reserved_limit_sell

    def _process_pending_orders(
        self,
        current_step: int,
        current_price: float,
        current_high: float,
        current_low: float,
        current_open: float,
    ) -> None:
        """Process pending limit orders."""
        remaining_orders: List[Order] = []
        executed_order_details = []

        for order in self.pending_orders:
            executed = False

            # Check for expiration
            expired = False
            if order.tif == OrderTIF.TTL:
                expired = current_step - order.placed_at > self.order_expiration_steps

            if expired:
                if order.type == OrderType.LIMIT_BUY:
                    self.balance += order.cost_reserved
                elif order.type == OrderType.LIMIT_SELL:
                    self.units_held += order.shares

                executed_order_details.append(
                    {
                        "step": current_step,
                        "type": f"{order.type.value}_expired",
                        "shares": order.shares,
                        "price": order.price,
                        "reason": "Expired",
                    }
                )
                executed = True

            # === Limit Buy Execution ===
            # Execute if Low price dipped below Limit Price
            elif order.type == OrderType.LIMIT_BUY and current_low <= order.price:
                # Determine execution price (Gap Handling)
                # If Open < Limit, we assume we filled at Open (better price).
                # Otherwise we filled at Limit.
                execution_price = order.price
                if current_open < order.price:
                    execution_price = current_open

                # Refund the cost difference if we got a better price
                actual_cost = order.shares * execution_price * (1 + self.transaction_cost_pct)
                cost_diff = order.cost_reserved - actual_cost
                if cost_diff > 0:
                    self.balance += cost_diff

                # Note: We technically might have reserved too little if execution_price > reserved_price
                # but Limit Buy ensures price <= limit, so cost is always <= reserved.

                self.units_held += order.shares
                executed = True

                executed_order_details.append(
                    {
                        "step": current_step,
                        "type": "limit_buy_executed",
                        "shares": order.shares,
                        "price": execution_price,
                        "reference_price": order.reference_price,
                        "cost": actual_cost,
                    }
                )

            # === Limit Sell Execution ===
            # Execute if High price reached Limit Price
            elif order.type == OrderType.LIMIT_SELL and current_high >= order.price:
                # Determine execution price (Gap Handling)
                # If Open > Limit, we filled at Open (better price).
                execution_price = order.price
                if current_open > order.price:
                    execution_price = current_open

                revenue = order.shares * execution_price * (1 - self.transaction_cost_pct)
                self.balance += revenue
                executed = True

                executed_order_details.append(
                    {
                        "step": current_step,
                        "type": "limit_sell_executed",
                        "shares": order.shares,
                        "price": execution_price,
                        "reference_price": order.reference_price,
                        "revenue": revenue,
                    }
                )

            if not executed:
                remaining_orders.append(order)

        # Update the list of pending orders and log any events
        self.pending_orders = remaining_orders
        if executed_order_details:
            self.executed_orders_history.extend(executed_order_details)

    def _process_risk_management_orders(
        self,
        current_step: int,
        current_price: float,
        current_high: float,
        current_low: float,
        current_open: float,
    ) -> None:
        """Process stop-loss and take-profit orders."""
        executed_order_details = []

        # === Process Stop Loss Orders ===
        remaining_stop_loss: List[Order] = []
        for order in self.stop_loss_orders:
            # Check Expiration for TTL
            expired = False
            if order.tif == OrderTIF.TTL:
                expired = current_step - order.placed_at > self.order_expiration_steps

            if expired:
                self.units_held += order.shares
                executed_order_details.append(
                    {
                        "step": current_step,
                        "type": "stop_loss_expired",
                        "shares": order.shares,
                        "price": order.price,
                    }
                )
                continue

            # Check Trigger: Low <= Stop Price
            if current_low <= order.price:
                # Determine execution price (Gap Handling)
                # If Open < Stop Price (gap down), we fill at Open (worse price).
                # Otherwise we fill at Stop Price.
                trigger_price = order.price
                fill_price = trigger_price
                if current_open < trigger_price:
                    fill_price = current_open

                # Apply slippage to the fill price
                adjusted_price = fill_price * (1 - self.slippage)
                revenue = order.shares * adjusted_price * (1 - self.transaction_cost_pct)
                self.balance += revenue

                executed_order_details.append(
                    {
                        "step": current_step,
                        "type": "stop_loss_executed",
                        "shares": order.shares,
                        "trigger_price": trigger_price,
                        "execution_price": adjusted_price,
                        "revenue": revenue,
                    }
                )
            else:
                remaining_stop_loss.append(order)
        self.stop_loss_orders = remaining_stop_loss

        # === Process Take Profit Orders ===
        remaining_take_profit: List[Order] = []
        for order in self.take_profit_orders:
            # Check Expiration
            expired = False
            if order.tif == OrderTIF.TTL:
                expired = current_step - order.placed_at > self.order_expiration_steps

            if expired:
                self.units_held += order.shares
                executed_order_details.append(
                    {
                        "step": current_step,
                        "type": "take_profit_expired",
                        "shares": order.shares,
                        "price": order.price,
                    }
                )
                continue

            # Check Trigger: High >= Take Profit Price
            if current_high >= order.price:
                # Determine execution price (Gap Handling)
                # If Open > TP Price (gap up), we fill at Open (better price).
                trigger_price = order.price
                fill_price = trigger_price
                if current_open > trigger_price:
                    fill_price = current_open

                # Apply slippage
                adjusted_price = fill_price * (1 - self.slippage)
                revenue = order.shares * adjusted_price * (1 - self.transaction_cost_pct)
                self.balance += revenue

                executed_order_details.append(
                    {
                        "step": current_step,
                        "type": "take_profit_executed",
                        "shares": order.shares,
                        "trigger_price": trigger_price,
                        "execution_price": adjusted_price,
                        "revenue": revenue,
                    }
                )
            else:
                remaining_take_profit.append(order)
        self.take_profit_orders = remaining_take_profit

`shares_held` `property`

Returns the number of shares currently held in the portfolio.

Returns:

Name	Type	Description
`int`	`int`	The number of shares held.

`total_shares` `property`

Returns the total number of shares held, including those reserved in orders.

Returns:

Name	Type	Description
`int`	`int`	The total number of shares held.

`reset()`

Reset the portfolio to its initial state.

Source code in src/quantrl_lab/environments/stock/components/portfolio.py

def reset(self) -> None:
    """Reset the portfolio to its initial state."""
    super().reset()
    self.pending_orders = []
    self.stop_loss_orders = []
    self.take_profit_orders = []
    self.executed_orders_history = []

`get_value(current_price)`

Calculate the total value of the portfolio including unfilled orders and reserved money.

Parameters:

Name	Type	Description	Default
`current_price`	`float`	The current market price of the asset.	required

Returns:

Name	Type	Description
`float`	`float`	The total portfolio value including all positions and reserved amounts.

Source code in src/quantrl_lab/environments/stock/components/portfolio.py

def get_value(self, current_price: float) -> float:
    """
    Calculate the total value of the portfolio including unfilled
    orders and reserved money.

    Args:
        current_price (float): The current market price of the asset.

    Returns:
        float: The total portfolio value including all positions and reserved amounts.
    """
    # Base value: free balance + value of free shares
    total_value = self.balance + (self.units_held * current_price)

    # Add reserved cash from pending buy orders
    for order in self.pending_orders:
        if order.type == OrderType.LIMIT_BUY:
            total_value += order.cost_reserved

    # Add value of shares reserved in pending sell orders
    for order in self.pending_orders:
        if order.type == OrderType.LIMIT_SELL:
            total_value += order.shares * current_price

    # Add value of shares reserved in stop loss orders
    for order in self.stop_loss_orders:
        total_value += order.shares * current_price

    # Add value of shares reserved in take profit orders
    for order in self.take_profit_orders:
        total_value += order.shares * current_price

    return total_value

`process_open_orders(current_step, current_price, current_high=None, current_low=None, current_open=None)`

Process all open orders using OHLC data for realistic execution.

Parameters:

Name	Type	Description	Default
`current_step`	`int`	The current step in the trading environment.	required
`current_price`	`float`	The current close price.	required
`current_high`	`Optional[float]`	High price of the bar. Defaults to current_price.	`None`
`current_low`	`Optional[float]`	Low price of the bar. Defaults to current_price.	`None`
`current_open`	`Optional[float]`	Open price of the bar. Defaults to current_price.	`None`

Source code in src/quantrl_lab/environments/stock/components/portfolio.py

def process_open_orders(
    self,
    current_step: int,
    current_price: float,
    current_high: Optional[float] = None,
    current_low: Optional[float] = None,
    current_open: Optional[float] = None,
) -> None:
    """
    Process all open orders using OHLC data for realistic execution.

    Args:
        current_step (int): The current step in the trading environment.
        current_price (float): The current close price.
        current_high (Optional[float]): High price of the bar. Defaults to current_price.
        current_low (Optional[float]): Low price of the bar. Defaults to current_price.
        current_open (Optional[float]): Open price of the bar. Defaults to current_price.
    """
    # Fallback for Close-only execution (backward compatibility)
    if current_high is None:
        current_high = current_price
    if current_low is None:
        current_low = current_price
    if current_open is None:
        current_open = current_price

    self._process_pending_orders(current_step, current_price, current_high, current_low, current_open)
    self._process_risk_management_orders(current_step, current_price, current_high, current_low, current_open)

`execute_market_order(action_type, current_price, amount_pct, current_step)`

Execute a market order.

Parameters:

Name	Type	Description	Default
`action_type`	`Actions`	The type of action (buy/sell).	required
`current_price`	`float`	The current market price.	required
`amount_pct`	`float`	The percentage of the portfolio to use for the order.	required
`current_step`	`int`	The current step in the trading environment.	required

Returns:

Type	Description
`None`	None

Source code in src/quantrl_lab/environments/stock/components/portfolio.py

def execute_market_order(
    self, action_type: Actions, current_price: float, amount_pct: float, current_step: int
) -> None:
    """
    Execute a market order.

    Args:
        action_type (Actions): The type of action (buy/sell).
        current_price (float): The current market price.
        amount_pct (float): The percentage of the portfolio to use for the order.
        current_step (int): The current step in the trading environment.

    Returns:
        None
    """
    # Clip amount_pct to valid range
    amount_pct = max(0.0, min(1.0, amount_pct))

    # === Runtime error checks ===
    if self.balance <= 0 and action_type == Actions.Buy:
        return  # Insufficient balance to execute buy order
    if action_type not in [Actions.Buy, Actions.Sell]:
        raise ValueError("Invalid action type for market order")

    # === Buy Logic ===
    if action_type == Actions.Buy:
        adjusted_price = current_price * (1 + self.slippage)
        cost_per_share = adjusted_price * (1 + self.transaction_cost_pct)
        if cost_per_share <= 1e-9:
            return  # Avoid division by zero

        shares_to_buy = int((self.balance / cost_per_share) * amount_pct)
        if shares_to_buy > 0:
            actual_cost = shares_to_buy * cost_per_share
            if actual_cost <= self.balance:
                self.balance -= actual_cost
                self.units_held += shares_to_buy
                self.executed_orders_history.append(
                    {
                        "step": current_step,
                        "type": "market_buy",
                        "shares": shares_to_buy,
                        "price": adjusted_price,
                        "cost": actual_cost,
                    }
                )

    # === Sell Logic ===
    elif action_type == Actions.Sell:
        if self.units_held <= 0:
            return
        shares_to_sell = int(self.units_held * amount_pct)
        if shares_to_sell > 0:
            adjusted_price = current_price * (1 - self.slippage)
            revenue = shares_to_sell * adjusted_price * (1 - self.transaction_cost_pct)
            self.units_held -= shares_to_sell
            self.balance += revenue
            self.executed_orders_history.append(
                {
                    "step": current_step,
                    "type": "market_sell",
                    "shares": shares_to_sell,
                    "price": adjusted_price,
                    "revenue": revenue,
                }
            )

`place_limit_order(action_type, current_price, amount_pct, price_modifier, current_step, tif=OrderTIF.TTL)`

Place a limit order for buying or selling an asset.

Parameters:

Name	Type	Description	Default
`action_type`	`Actions`	The type of action (LimitBuy/LimitSell).	required
`current_price`	`float`	The current market price.	required
`amount_pct`	`float`	The percentage of the portfolio to use for the order.	required
`price_modifier`	`float`	The price modifier to apply to the current price.	required
`current_step`	`int`	The current step in the trading environment.	required
`tif`	`OrderTIF`	Time in Force for the order.	`TTL`

Returns:

Type	Description
`None`	None

Source code in src/quantrl_lab/environments/stock/components/portfolio.py

def place_limit_order(
    self,
    action_type: Actions,
    current_price: float,
    amount_pct: float,
    price_modifier: float,
    current_step: int,
    tif: OrderTIF = OrderTIF.TTL,  # Default to TTL to preserve previous behavior
) -> None:
    """
    Place a limit order for buying or selling an asset.

    Args:
        action_type (Actions): The type of action (LimitBuy/LimitSell).
        current_price (float): The current market price.
        amount_pct (float): The percentage of the portfolio to use for the order.
        price_modifier (float): The price modifier to apply to the current price.
        current_step (int): The current step in the trading environment.
        tif (OrderTIF): Time in Force for the order.

    Returns:
        None
    """
    limit_price = current_price * price_modifier

    # === Limit Buy Logic ===
    if action_type == Actions.LimitBuy:
        cost_per_share = limit_price * (1 + self.transaction_cost_pct)
        if cost_per_share <= 1e-9:
            return
        shares_to_buy = int((self.balance / cost_per_share) * amount_pct)
        if shares_to_buy > 0:
            cost_reserved = shares_to_buy * cost_per_share

            # Check balance
            if cost_reserved > self.balance:
                return

            # --- Handle IOC (Immediate or Cancel) ---
            if tif == OrderTIF.IOC:
                # If current price <= limit price, execute immediately
                if current_price <= limit_price:
                    # IOC Execution matches logic of standard execution
                    execution_price = limit_price  # or current_price? Standard logic uses limit_price

                    self.balance -= cost_reserved
                    # Add shares (execution success)
                    self.units_held += shares_to_buy

                    self.executed_orders_history.append(
                        {
                            "step": current_step,
                            "type": "limit_buy_executed_ioc",
                            "shares": shares_to_buy,
                            "price": execution_price,
                            "cost": cost_reserved,
                        }
                    )
                # If not executable, do nothing (cancel)
                return

            # --- Handle GTC / TTL (Pending) ---
            self.balance -= cost_reserved
            order = Order(
                type=OrderType.LIMIT_BUY,
                shares=shares_to_buy,
                price=limit_price,
                placed_at=current_step,
                reference_price=current_price,
                cost_reserved=cost_reserved,
                tif=tif,
            )
            self.pending_orders.append(order)

            self.executed_orders_history.append(
                {
                    "step": current_step,
                    "type": "limit_buy_placed",
                    "shares": shares_to_buy,
                    "price": limit_price,
                    "tif": tif.value,
                }
            )

    # === Limit Sell Logic ===
    elif action_type == Actions.LimitSell:
        if self.units_held <= 0:
            return
        shares_to_sell = int(self.units_held * amount_pct)
        if shares_to_sell > 0:

            # --- Handle IOC (Immediate or Cancel) ---
            if tif == OrderTIF.IOC:
                # If current price >= limit price, execute immediately
                if current_price >= limit_price:
                    execution_price = limit_price

                    # Calculate revenue
                    revenue = shares_to_sell * execution_price * (1 - self.transaction_cost_pct)

                    self.units_held -= shares_to_sell
                    self.balance += revenue

                    self.executed_orders_history.append(
                        {
                            "step": current_step,
                            "type": "limit_sell_executed_ioc",
                            "shares": shares_to_sell,
                            "price": execution_price,
                            "revenue": revenue,
                        }
                    )
                # If not executable, do nothing (cancel)
                return

            # --- Handle GTC / TTL (Pending) ---
            self.units_held -= shares_to_sell
            order = Order(
                type=OrderType.LIMIT_SELL,
                shares=shares_to_sell,
                price=limit_price,
                placed_at=current_step,
                reference_price=current_price,
                tif=tif,
            )
            self.pending_orders.append(order)

            self.executed_orders_history.append(
                {
                    "step": current_step,
                    "type": "limit_sell_placed",
                    "shares": shares_to_sell,
                    "price": limit_price,
                    "tif": tif.value,
                }
            )

`place_risk_management_order(action_type, current_price, amount_pct, price_modifier, current_step, tif=OrderTIF.GTC)`

Place a risk management order (stop loss or take profit).

Parameters:

Name	Type	Description	Default
`action_type`	`Actions`	The type of action (StopLoss/TakeProfit).	required
`current_price`	`float`	The current market price.	required
`amount_pct`	`float`	The percentage of the portfolio to use for the order.	required
`price_modifier`	`float`	The price modifier to apply to the current price.	required
`current_step`	`int`	The current step in the trading environment.	required
`tif`	`OrderTIF`	Time in Force. Only GTC and TTL are valid for Stop orders.	`GTC`

Returns:

Type	Description
`None`	None

Source code in src/quantrl_lab/environments/stock/components/portfolio.py

def place_risk_management_order(
    self,
    action_type: Actions,
    current_price: float,
    amount_pct: float,
    price_modifier: float,
    current_step: int,
    tif: OrderTIF = OrderTIF.GTC,  # Default to GTC (standard for stop loss)
) -> None:
    """
    Place a risk management order (stop loss or take profit).

    Args:
        action_type (Actions): The type of action (StopLoss/TakeProfit).
        current_price (float): The current market price.
        amount_pct (float): The percentage of the portfolio to use for the order.
        price_modifier (float): The price modifier to apply to the current price.
        current_step (int): The current step in the trading environment.
        tif (OrderTIF): Time in Force. Only GTC and TTL are valid for Stop orders.

    Returns:
        None
    """
    # Validate TIF for Stop orders
    if tif == OrderTIF.IOC:
        return  # IOC is invalid for Stop orders (must rest until trigger)

    if self.units_held <= 0:
        return
    shares_to_cover = int(self.units_held * amount_pct)
    if shares_to_cover > 0:
        # === Stop Loss Logic ===
        if action_type == Actions.StopLoss:
            stop_price = current_price * min(0.999, price_modifier)
            if stop_price >= current_price:
                stop_price = current_price * 0.999

            self.units_held -= shares_to_cover

            order = Order(
                type=OrderType.STOP_LOSS, shares=shares_to_cover, price=stop_price, placed_at=current_step, tif=tif
            )
            self.stop_loss_orders.append(order)

            self.executed_orders_history.append(
                {
                    "step": current_step,
                    "type": "stop_loss_placed",
                    "shares": shares_to_cover,
                    "price": stop_price,
                    "tif": tif.value,
                }
            )
        # === Take Profit Logic ===
        elif action_type == Actions.TakeProfit:
            take_profit_price = current_price * max(1.001, price_modifier)
            if take_profit_price <= current_price:
                take_profit_price = current_price * 1.001

            self.units_held -= shares_to_cover

            order = Order(
                type=OrderType.TAKE_PROFIT,
                shares=shares_to_cover,
                price=take_profit_price,
                placed_at=current_step,
                tif=tif,
            )
            self.take_profit_orders.append(order)

            self.executed_orders_history.append(
                {
                    "step": current_step,
                    "type": "take_profit_placed",
                    "shares": shares_to_cover,
                    "price": take_profit_price,
                    "tif": tif.value,
                }
            )

Environments

Interfaces & Protocols

interfaces

TradingEnvProtocol

BaseActionStrategy

define_action_space() abstractmethod

handle_action(env_self, action) abstractmethod

BaseObservationStrategy

define_observation_space(env) abstractmethod

build_observation(env) abstractmethod

get_feature_names(env) abstractmethod

BaseRewardStrategy

calculate_reward(env) abstractmethod

on_step_end(env)

Types

types

Configuration

config

SimulationConfig

RewardConfig

SingleStockEnvConfig

SingleStockTradingEnv

single

SingleStockTradingEnv

step(action)

reset(*, seed=None, options=None)

render(mode='human')

Portfolio

portfolio

StockPortfolio

shares_held property

total_shares property

reset()

get_value(current_price)

process_open_orders(current_step, current_price, current_high=None, current_low=None, current_open=None)

execute_market_order(action_type, current_price, amount_pct, current_step)

place_limit_order(action_type, current_price, amount_pct, price_modifier, current_step, tif=OrderTIF.TTL)

place_risk_management_order(action_type, current_price, amount_pct, price_modifier, current_step, tif=OrderTIF.GTC)

`interfaces`

`TradingEnvProtocol`

`BaseActionStrategy`

`define_action_space()` `abstractmethod`

`handle_action(env_self, action)` `abstractmethod`

`BaseObservationStrategy`

`define_observation_space(env)` `abstractmethod`

`build_observation(env)` `abstractmethod`

`get_feature_names(env)` `abstractmethod`

`BaseRewardStrategy`

`calculate_reward(env)` `abstractmethod`

`on_step_end(env)`

`types`

`config`

`SimulationConfig`

`RewardConfig`

`SingleStockEnvConfig`

`single`

`SingleStockTradingEnv`

`step(action)`

`reset(*, seed=None, options=None)`

`render(mode='human')`

`portfolio`

`StockPortfolio`

`shares_held` `property`

`total_shares` `property`

`reset()`

`get_value(current_price)`

`process_open_orders(current_step, current_price, current_high=None, current_low=None, current_open=None)`

`execute_market_order(action_type, current_price, amount_pct, current_step)`

`place_limit_order(action_type, current_price, amount_pct, price_modifier, current_step, tif=OrderTIF.TTL)`

`place_risk_management_order(action_type, current_price, amount_pct, price_modifier, current_step, tif=OrderTIF.GTC)`