Source code for prob_spaces.multi_discrete

"""Module for probability distributions over MultiDiscrete spaces."""

from typing import Any

import numpy as np
import torch as th
from gymnasium import spaces
from numpy.typing import NDArray

from prob_spaces.dists.categorical import CategoricalDist, MaskedCategorical


[docs] class MultiDiscreteDist(spaces.MultiDiscrete): """Probability distribution for MultiDiscrete spaces.""" def __init__( self, nvec: NDArray[np.integer[Any]] | list[int], dtype: str | type[np.integer[Any]] = np.int64, seed: int | np.random.Generator | None = None, start: NDArray[np.integer[Any]] | list[int] | None = None, ): """Initialize MultiDiscreteDist with nvec, dtype, seed, and start.""" super().__init__(nvec, dtype, seed, start) self.internal_mask = self._internal_mask() @property def prob_last_dim(self) -> int: """Return the last dimension size for probability tensors.""" return int(np.max(self.nvec)) + 1 def _internal_mask(self) -> NDArray[np.bool_]: """Return internal mask for valid actions in MultiDiscrete space. Returns ------- NDArray[np.bool_] Internal mask array indicating valid actions in the MultiDiscrete space. """ prob_last_dim = self.prob_last_dim shape = (*self.nvec.shape, self.prob_last_dim) mask = np.zeros(shape=shape, dtype=np.bool) max_arrange = np.arange(start=0, stop=prob_last_dim) all_actions = np.zeros_like(mask, dtype=self.nvec.dtype) all_actions[..., :] = max_arrange diffs = np.abs(self.nvec) c_diffs = np.broadcast_to(diffs[..., np.newaxis], shape) mask[c_diffs > all_actions] = True return mask
[docs] def __call__(self, prob: th.Tensor, mask: th.Tensor = None) -> MaskedCategorical: """Apply a transformation to the input probability tensor and optional mask. Create a `MaskedCategorical` distribution by reshaping the input probabilities, applying an optional mask, and combining with an internal mask. :param prob: A tensor containing probabilities to be reshaped and used in constructing the distribution. :type prob: th.Tensor :param mask: An optional boolean tensor for masking specific probabilities before creating the distribution. Defaults to None. :type mask: th.Tensor, optional :return: A `MaskedCategorical` distribution object created with reshaped probabilities and combined masking information. :rtype: MaskedCategorical Returns ------- MaskedCategorical A `MaskedCategorical` distribution object created with reshaped probabilities and combined masking information. """ probs = prob.reshape(*self.nvec.shape, self.prob_last_dim) start = self.start mask = mask if mask is not None else th.ones_like(probs, dtype=th.bool, device=probs.device) mask = th.logical_and(mask, th.tensor(self.internal_mask, dtype=th.bool, device=probs.device)) dist = CategoricalDist(probs, mask=mask, start=start) return dist
[docs] @classmethod def from_space(cls, space: spaces.MultiDiscrete) -> "MultiDiscreteDist": """Create a MultiDiscreteDist from a gymnasium MultiDiscrete space. Returns ------- MultiDiscreteDist An instance of MultiDiscreteDist created from the given gymnasium MultiDiscrete space. """ return cls(nvec=space.nvec, dtype=space.dtype, start=space.start) # type: ignore