import numpy as np from gym import Env, spaces from gym.utils import seeding from gym.envs.toy_text.utils import categorical_sample class DiscreteEnv(Env): """ Has the following members - nS: number of states - nA: number of actions - P: transitions (*) - isd: initial state distribution (**) (*) dictionary of lists, where P[s][a] == [(probability, nextstate, reward, done), ...] (**) list or array of length nS """ def __init__(self, nS, nA, P, isd): self.P = P self.isd = isd self.lastaction = None # for rendering self.nS = nS self.nA = nA self.action_space = spaces.Discrete(self.nA) self.observation_space = spaces.Discrete(self.nS) self.seed() self.s = categorical_sample(self.isd, self.np_random) def seed(self, seed=None): self.np_random, seed = seeding.np_random(seed) return [seed] def reset(self): self.s = categorical_sample(self.isd, self.np_random) self.lastaction = None return int(self.s) def step(self, a): transitions = self.P[self.s][a] i = categorical_sample([t[0] for t in transitions], self.np_random) p, s, r, d = transitions[i] self.s = s self.lastaction = a return (int(s), r, d, {"prob": p})