From 49529a9400fb06d8992d03fe8d4d4351797b3004 Mon Sep 17 00:00:00 2001
From: Jerry Wu <a91082900@gmail.com>
Date: Thu, 26 Oct 2023 01:43:56 +0800
Subject: [PATCH] environment test

---
 agent/__init__.py                    |   1 +
 agent/util.py                        |  12 ++
 document/FlashcardDocument.py        |  19 ++
 document/FlashcardDocumentSampler.py |  14 ++
 document/__init__.py                 |   1 +
 main.py                              |  39 ++++
 recsim_environment.py                | 309 +++++++++++++++++++++++++++
 user/FlashcardUserModel.py           |  65 ++++++
 user/UserResponse.py                 |  15 ++
 user/UserSampler.py                  |  15 ++
 user/UserState.py                    |  26 +++
 user/__init__.py                     |   2 +
 util/__init__.py                     |   1 +
 util/util.py                         |  35 +++
 14 files changed, 554 insertions(+)
 create mode 100644 agent/__init__.py
 create mode 100644 agent/util.py
 create mode 100644 document/FlashcardDocument.py
 create mode 100644 document/FlashcardDocumentSampler.py
 create mode 100644 document/__init__.py
 create mode 100644 main.py
 create mode 100644 recsim_environment.py
 create mode 100644 user/FlashcardUserModel.py
 create mode 100644 user/UserResponse.py
 create mode 100644 user/UserSampler.py
 create mode 100644 user/UserState.py
 create mode 100644 user/__init__.py
 create mode 100644 util/__init__.py
 create mode 100644 util/util.py

diff --git a/agent/__init__.py b/agent/__init__.py
new file mode 100644
index 0000000..e8df281
--- /dev/null
+++ b/agent/__init__.py
@@ -0,0 +1 @@
+from .util import create_create_agent
\ No newline at end of file
diff --git a/agent/util.py b/agent/util.py
new file mode 100644
index 0000000..4534b20
--- /dev/null
+++ b/agent/util.py
@@ -0,0 +1,12 @@
+from recsim.agents import full_slate_q_agent
+
+def create_create_agent(agent=full_slate_q_agent.FullSlateQAgent):
+    def create_agent(sess, environment, eval_mode, summary_writer=None):
+        kwargs = {
+            'observation_space': environment.observation_space,
+            'action_space': environment.action_space,
+            'summary_writer': summary_writer,
+            'eval_mode': eval_mode,
+        }
+        return agent(sess, **kwargs)
+    return create_agent
\ No newline at end of file
diff --git a/document/FlashcardDocument.py b/document/FlashcardDocument.py
new file mode 100644
index 0000000..41d9567
--- /dev/null
+++ b/document/FlashcardDocument.py
@@ -0,0 +1,19 @@
+from recsim import document
+from gym import spaces
+import numpy as np
+
+class FlashcardDocument(document.AbstractDocument):
+  def __init__(self, doc_id, difficulty):
+    self.base_difficulty = difficulty
+    # doc_id is an integer representing the unique ID of this document
+    super(FlashcardDocument, self).__init__(doc_id)
+
+  def create_observation(self):
+    return np.array(self.base_difficulty)
+
+  @staticmethod
+  def observation_space():
+    return spaces.Box(shape=(1,3), dtype=np.float32, low=0.0, high=1.0)
+
+  def __str__(self):
+    return "Flashcard {} with difficulty {}.".format(self._doc_id, self.base_difficulty)
diff --git a/document/FlashcardDocumentSampler.py b/document/FlashcardDocumentSampler.py
new file mode 100644
index 0000000..8a8d872
--- /dev/null
+++ b/document/FlashcardDocumentSampler.py
@@ -0,0 +1,14 @@
+from .FlashcardDocument import FlashcardDocument
+from recsim import document
+
+class FlashcardDocumentSampler(document.AbstractDocumentSampler):
+  def __init__(self, doc_ctor=FlashcardDocument, **kwargs):
+    super(FlashcardDocumentSampler, self).__init__(doc_ctor, **kwargs)
+    self._doc_count = 0
+
+  def sample_document(self):
+    doc_features = {}
+    doc_features['doc_id'] = self._doc_count
+    doc_features['difficulty'] = self._rng.random_sample((1, 3))
+    self._doc_count += 1
+    return self._doc_ctor(**doc_features)
\ No newline at end of file
diff --git a/document/__init__.py b/document/__init__.py
new file mode 100644
index 0000000..8ac8fe3
--- /dev/null
+++ b/document/__init__.py
@@ -0,0 +1 @@
+from .FlashcardDocumentSampler import FlashcardDocumentSampler
\ No newline at end of file
diff --git a/main.py b/main.py
new file mode 100644
index 0000000..7650847
--- /dev/null
+++ b/main.py
@@ -0,0 +1,39 @@
+import tensorflow as tf
+from recsim.simulator import environment
+from user import FlashcardUserModel
+from document import FlashcardDocumentSampler
+from recsim.simulator import recsim_gym
+from recsim.agents import full_slate_q_agent
+from recsim.simulator import runner_lib
+from agent import create_create_agent
+from util import reward, update_metrics
+
+slate_size = 1
+num_candidates = 10
+time_budget = 60
+
+tf.compat.v1.disable_eager_execution()
+
+create_agent_fn = create_create_agent(full_slate_q_agent.FullSlateQAgent)
+
+ltsenv = environment.Environment(
+  FlashcardUserModel(num_candidates, time_budget, slate_size),
+  FlashcardDocumentSampler(),
+  num_candidates,
+  slate_size,
+  resample_documents=False)
+
+lts_gym_env = recsim_gym.RecSimGymEnv(ltsenv, reward, update_metrics)
+lts_gym_env.reset()
+
+tmp_base_dir = './recsim/'
+runner = runner_lib.TrainRunner(
+    base_dir=tmp_base_dir,
+    create_agent_fn=create_agent_fn,
+    env=lts_gym_env,
+    episode_log_file="",
+    max_training_steps=5,
+    num_iterations=1
+)
+
+runner.run_experiment()
\ No newline at end of file
diff --git a/recsim_environment.py b/recsim_environment.py
new file mode 100644
index 0000000..42532ed
--- /dev/null
+++ b/recsim_environment.py
@@ -0,0 +1,309 @@
+# -*- coding: utf-8 -*-
+"""RecSim Environment
+
+Automatically generated by Colaboratory.
+
+Original file is located at
+    https://colab.research.google.com/drive/1KJbwKa0URSOU9B7GsDAkYOoFAoU5g14Y
+"""
+
+!pip install --upgrade --no-cache-dir recsim
+
+#@title Generic imports
+import numpy as np
+from gym import spaces
+import matplotlib.pyplot as plt
+from scipy import stats
+
+#@title RecSim imports
+from recsim import document
+from recsim import user
+from recsim.choice_model import MultinomialLogitChoiceModel
+from recsim.simulator import environment
+from recsim.simulator import recsim_gym
+
+# diasble eager execution to avoid error
+import tensorflow as tf
+tf.compat.v1.disable_eager_execution()
+
+"""# Flashcard Learning Environment Build
+## Documents (Flashcards)
+ - difficulty (w)
+ - deadline
+ - other features?
+
+### Document Model
+### Sampler
+
+## Users
+### User State and Transition
+**static**
+- learning ability
+
+**dynamic**
+ - recall history (#correct, #wrong)
+
+### Sampler
+
+### User Choice Model
+ - user has no choice but to review the card agent provides
+
+### User Response
+ - user's self evaluation (remember or not) -> update history
+
+## Reward (From User Response)
+ - gain = maximum additional retention rate if the card is chosen
+ - time factor = α * sqrt(lnδ/n_t)
+"""
+
+slate_size = 1
+num_candidates = 10
+
+class FlashcardDocument(document.AbstractDocument):
+  def __init__(self, doc_id, difficulty):
+    self.base_difficulty = difficulty
+    # doc_id is an integer representing the unique ID of this document
+    super(FlashcardDocument, self).__init__(doc_id)
+
+  def create_observation(self):
+    return np.array(self.base_difficulty)
+
+  @staticmethod
+  def observation_space():
+    return spaces.Box(shape=(1,3), dtype=np.float32, low=0.0, high=1.0)
+
+  def __str__(self):
+    return "Flashcard {} with difficulty {}.".format(self._doc_id, self.base_difficulty)
+
+class FlashcardDocumentSampler(document.AbstractDocumentSampler):
+  def __init__(self, doc_ctor=FlashcardDocument, **kwargs):
+    super(FlashcardDocumentSampler, self).__init__(doc_ctor, **kwargs)
+    self._doc_count = 0
+
+  def sample_document(self):
+    doc_features = {}
+    doc_features['doc_id'] = self._doc_count
+    doc_features['difficulty'] = self._rng.random_sample((1, 3))
+    self._doc_count += 1
+    return self._doc_ctor(**doc_features)
+
+class UserState(user.AbstractUserState):
+  def __init__(self, num_candidates, time_budget):
+    self._cards = num_candidates
+    self._history = np.zeros((num_candidates, 3))
+    self._last_review = np.zeros((num_candidates,))
+    self._time_budget = time_budget
+    self._time = 0
+    self._W = np.zeros((num_candidates, 3))
+    super(UserState, self).__init__()
+  def create_observation(self):
+    return {'history': self._history, 'last_review': self._last_review, 'time': self._time, 'time_budget': self._time_budget}
+
+  @staticmethod
+  def observation_space():
+    return spaces.Dict({
+        'history': spaces.Box(shape=(num_candidates, 3), low=0, high=np.inf, dtype=int),
+        'last_review': spaces.Box(shape=(num_candidates,), low=0, high=np.inf, dtype=int),
+        'time': spaces.Box(shape=(1,), low=0, high=np.inf, dtype=int),
+        'time_budget': spaces.Box(shape=(1,), low=0, high=np.inf, dtype=int),
+    })
+
+  def score_document(self, doc_obs):
+    return 1
+
+class UserSampler(user.AbstractUserSampler):
+  _state_parameters = {'num_candidates': num_candidates, 'time_budget': 60}
+  def __init__(self,
+               user_ctor=UserState,
+               **kwargs):
+    # self._state_parameters = {'num_candidates': num_candidates}
+    super(UserSampler, self).__init__(user_ctor, **kwargs)
+
+
+  def sample_user(self):
+    return self._user_ctor(**self._state_parameters)
+
+sampler = UserSampler()
+# for i in range(10):
+u = sampler.sample_user()
+u.observation_space()
+
+class UserResponse(user.AbstractResponse):
+  def __init__(self, recall=False, pr=0):
+    self._recall = recall
+    self._pr = pr
+
+  def create_observation(self):
+    return {'recall': int(self._recall), 'pr': self._pr}
+
+  @classmethod
+  def response_space(cls):
+    # return spaces.Discrete(2)
+    return spaces.Dict({'recall': spaces.Discrete(2), 'pr': spaces.Box(low=0.0, high=1.0)})
+
+"""# Evaluation
+Calling `eval_result()` to evaluate the agent performance. This function should be outside the RecSim structure to avoid changing the training status.
+
+"""
+
+from datetime import datetime
+def eval_result(train_time, last_review, history, W):
+  with open(f"{datetime.now()}.txt", "w") as f:
+    print(train_time, file=f)
+    print(last_review, file=f)
+    print(history, file=f)
+    print(W, file=f)
+    # np.einsum('ij,ij->i', a, b)
+    last_review = train_time - last_review
+    mem_param = np.exp(np.einsum('ij,ij->i', history, W))
+    pr = np.exp(-last_review / mem_param)
+    print(pr, file=f)
+    print(pr)
+    print("score:", np.sum(pr) / pr.shape[0], file=f)
+    print("score:", np.sum(pr) / pr.shape[0])
+
+class FlashcardUserModel(user.AbstractUserModel):
+  def __init__(self, slate_size, seed=0):
+    super(FlashcardUserModel, self).__init__(
+        UserResponse, UserSampler(
+            UserState, seed=seed
+        ), slate_size)
+    self.choice_model = MultinomialLogitChoiceModel({})
+
+  def is_terminal(self):
+    terminated = self._user_state._time > self._user_state._time_budget
+    if terminated: # run evaluation process
+      eval_result(self._user_state._time,
+                  self._user_state._last_review.copy(),
+                  self._user_state._history.copy(),
+                  self._user_state._W.copy())
+    return terminated
+
+  def update_state(self, slate_documents, responses):
+    for doc, response in zip(slate_documents, responses):
+      doc_id = doc._doc_id
+      self._user_state._history[doc_id][0] += 1
+      if response._recall:
+        self._user_state._history[doc_id][1] += 1
+      else:
+        self._user_state._history[doc_id][2] += 1
+      self._user_state._last_review[doc_id] = self._user_state._time
+    self._user_state._time += 1
+
+  def simulate_response(self, slate_documents):
+    responses = [self._response_model_ctor() for _ in slate_documents]
+    # Get click from of choice model.
+    self.choice_model.score_documents(
+      self._user_state, [doc.create_observation() for doc in slate_documents])
+    scores = self.choice_model.scores
+    selected_index = self.choice_model.choose_item()
+    # Populate clicked item.
+    self._generate_response(slate_documents[selected_index],
+                            responses[selected_index])
+    return responses
+
+  def _generate_response(self, doc, response):
+    # W = np.array([1,1,1])
+    doc_id = doc._doc_id
+    W = self._user_state._W[doc_id]
+    if not W.any(): # uninitialzed
+      self._user_state._W[doc_id] = W = doc.base_difficulty + np.random.uniform(-1, 1, (1, 3)) # a uniform error for each user
+      print(W)
+    # use exponential function to simulate whether the user recalls
+    last_review = self._user_state._time - self._user_state._last_review[doc_id]
+    x = self._user_state._history[doc_id]
+
+    pr = np.exp(-last_review / np.exp(np.dot(W, x))).squeeze()
+    print(f"time: {self._user_state._time}, reviewing flashcard {doc_id}, recall rate = {pr}")
+    if np.random.rand() < pr: # remembered
+      response._recall = True
+    response._pr = pr
+
+ltsenv = environment.Environment(
+  FlashcardUserModel(slate_size),
+  FlashcardDocumentSampler(),
+  num_candidates,
+  slate_size,
+  resample_documents=False)
+
+def reward(responses):
+  reward = 0.0
+  for response in responses:
+    reward += int(response._recall)
+  return reward
+
+def update_metrics(responses, metrics, info):
+  # print("responses: ", responses)
+  prs = []
+  for response in responses:
+    prs.append(response['pr'])
+  if type(metrics) != list:
+    metrics = [prs]
+  else:
+    metrics.append(prs)
+  # print(metrics)
+  return metrics
+
+observation = ltsenv.reset()
+# user - history (n, n+, n-)
+print("Observation space of user:")
+print(u.observation_space(), '\n')
+print("User history:")
+print(observation[0]['history'], '\n')
+# user - last review time of each card
+print("User last_review:")
+print(observation[0]['last_review'], '\n')
+# user - current time (you can get the delta by time - last_review)
+print("User time:")
+print(observation[0]['time'], '\n')
+# user - time bidget (deadline)
+print("User time budget:")
+print(observation[0]['time_budget'])
+
+# ltsenv.reset()
+lts_gym_env = recsim_gym.RecSimGymEnv(ltsenv, reward, update_metrics)
+lts_gym_env.reset()
+
+try_observation = lts_gym_env.reset()
+
+for i in range(len(try_observation['doc'])):
+  print(try_observation['user']['history'][i])
+
+#print(try_observation['user']['history'].shape[0])
+
+my_list = [10.0, 5.5, 8.1, 2.0, 1.57]
+max_value = max(my_list)
+print(my_list.index(max(my_list)))
+
+def create_agent(sess, environment, eval_mode, summary_writer=None):
+  kwargs = {
+      'observation_space': environment.observation_space,
+      'action_space': environment.action_space,
+      'summary_writer': summary_writer,
+      'eval_mode': eval_mode,
+  }
+  return full_slate_q_agent.FullSlateQAgent(sess, **kwargs)
+
+#@title Importing RecSim components
+from recsim.environments import interest_evolution
+from recsim.agents import full_slate_q_agent
+from recsim.simulator import runner_lib
+
+tmp_base_dir = '/tmp/recsim/'
+runner = runner_lib.TrainRunner(
+    base_dir=tmp_base_dir,
+    create_agent_fn=create_agent,
+    env=lts_gym_env,
+    episode_log_file="",
+    max_training_steps=5,
+    num_iterations=1
+)
+
+runner.run_experiment()
+
+# Commented out IPython magic to ensure Python compatibility.
+# Load the TensorBoard notebook extension
+# %load_ext tensorboard
+#@title Tensorboard
+# %tensorboard --logdir=/tmp/recsim/
+
diff --git a/user/FlashcardUserModel.py b/user/FlashcardUserModel.py
new file mode 100644
index 0000000..306598a
--- /dev/null
+++ b/user/FlashcardUserModel.py
@@ -0,0 +1,65 @@
+from recsim import user
+from recsim.choice_model import MultinomialLogitChoiceModel
+from .UserState import UserState
+from .UserSampler import UserSampler
+from .UserResponse import UserResponse
+from util import eval_result
+import numpy as np
+
+class FlashcardUserModel(user.AbstractUserModel):
+  def __init__(self, num_candidates, time_budget, slate_size, seed=0):
+    super(FlashcardUserModel, self).__init__(
+        UserResponse, UserSampler(
+            UserState, num_candidates, time_budget, 
+            seed=seed
+        ), slate_size)
+    self.choice_model = MultinomialLogitChoiceModel({})
+
+  def is_terminal(self):
+    terminated = self._user_state._time > self._user_state._time_budget
+    if terminated: # run evaluation process
+      eval_result(self._user_state._time,
+                  self._user_state._last_review.copy(),
+                  self._user_state._history.copy(),
+                  self._user_state._W.copy())
+    return terminated
+
+  def update_state(self, slate_documents, responses):
+    for doc, response in zip(slate_documents, responses):
+      doc_id = doc._doc_id
+      self._user_state._history[doc_id][0] += 1
+      if response._recall:
+        self._user_state._history[doc_id][1] += 1
+      else:
+        self._user_state._history[doc_id][2] += 1
+      self._user_state._last_review[doc_id] = self._user_state._time
+    self._user_state._time += 1
+
+  def simulate_response(self, slate_documents):
+    responses = [self._response_model_ctor() for _ in slate_documents]
+    # Get click from of choice model.
+    self.choice_model.score_documents(
+      self._user_state, [doc.create_observation() for doc in slate_documents])
+    scores = self.choice_model.scores
+    selected_index = self.choice_model.choose_item()
+    # Populate clicked item.
+    self._generate_response(slate_documents[selected_index],
+                            responses[selected_index])
+    return responses
+
+  def _generate_response(self, doc, response):
+    # W = np.array([1,1,1])
+    doc_id = doc._doc_id
+    W = self._user_state._W[doc_id]
+    if not W.any(): # uninitialzed
+      self._user_state._W[doc_id] = W = doc.base_difficulty + np.random.uniform(-0.5, 0.5, (1, 3)) # a uniform error for each user
+      print(W)
+    # use exponential function to simulate whether the user recalls
+    last_review = self._user_state._time - self._user_state._last_review[doc_id]
+    x = self._user_state._history[doc_id]
+
+    pr = np.exp(-last_review / np.exp(np.dot(W, x))).squeeze()
+    print(f"time: {self._user_state._time}, reviewing flashcard {doc_id}, recall rate = {pr}")
+    if np.random.rand() < pr: # remembered
+      response._recall = True
+    response._pr = pr
\ No newline at end of file
diff --git a/user/UserResponse.py b/user/UserResponse.py
new file mode 100644
index 0000000..9d6097e
--- /dev/null
+++ b/user/UserResponse.py
@@ -0,0 +1,15 @@
+from recsim import user
+from gym import spaces
+
+class UserResponse(user.AbstractResponse):
+  def __init__(self, recall=False, pr=0):
+    self._recall = recall
+    self._pr = pr
+
+  def create_observation(self):
+    return {'recall': int(self._recall), 'pr': self._pr}
+
+  @classmethod
+  def response_space(cls):
+    # return spaces.Discrete(2)
+    return spaces.Dict({'recall': spaces.Discrete(2), 'pr': spaces.Box(low=0.0, high=1.0)})
diff --git a/user/UserSampler.py b/user/UserSampler.py
new file mode 100644
index 0000000..fc1613b
--- /dev/null
+++ b/user/UserSampler.py
@@ -0,0 +1,15 @@
+from .UserState import UserState
+from recsim import user
+
+class UserSampler(user.AbstractUserSampler):
+  def __init__(self,
+               user_ctor=UserState,
+               num_candidates=10,
+               time_budget=60,
+               **kwargs):
+    self._state_parameters = {'num_candidates': num_candidates, 'time_budget': time_budget}
+    super(UserSampler, self).__init__(user_ctor, **kwargs)
+
+
+  def sample_user(self):
+    return self._user_ctor(**self._state_parameters)
\ No newline at end of file
diff --git a/user/UserState.py b/user/UserState.py
new file mode 100644
index 0000000..2d74a59
--- /dev/null
+++ b/user/UserState.py
@@ -0,0 +1,26 @@
+from recsim import user
+import numpy as np
+from gym import spaces
+
+class UserState(user.AbstractUserState):
+  def __init__(self, num_candidates, time_budget):
+    self._cards = num_candidates
+    self._history = np.zeros((num_candidates, 3))
+    self._last_review = np.zeros((num_candidates,))
+    self._time_budget = time_budget
+    self._time = 0
+    self._W = np.zeros((num_candidates, 3))
+    super(UserState, self).__init__()
+  def create_observation(self):
+    return {'history': self._history, 'last_review': self._last_review, 'time': self._time, 'time_budget': self._time_budget}
+
+  def observation_space(self): # can this work?
+    return spaces.Dict({
+        'history': spaces.Box(shape=(self._cards, 3), low=0, high=np.inf, dtype=int),
+        'last_review': spaces.Box(shape=(self._cards,), low=0, high=np.inf, dtype=int),
+        'time': spaces.Box(shape=(1,), low=0, high=np.inf, dtype=int),
+        'time_budget': spaces.Box(shape=(1,), low=0, high=np.inf, dtype=int),
+    })
+
+  def score_document(self, doc_obs):
+    return 1
\ No newline at end of file
diff --git a/user/__init__.py b/user/__init__.py
new file mode 100644
index 0000000..b1c2610
--- /dev/null
+++ b/user/__init__.py
@@ -0,0 +1,2 @@
+from .FlashcardUserModel import FlashcardUserModel
+from .UserResponse import UserResponse
\ No newline at end of file
diff --git a/util/__init__.py b/util/__init__.py
new file mode 100644
index 0000000..fc37838
--- /dev/null
+++ b/util/__init__.py
@@ -0,0 +1 @@
+from util.util import *
\ No newline at end of file
diff --git a/util/util.py b/util/util.py
new file mode 100644
index 0000000..badead3
--- /dev/null
+++ b/util/util.py
@@ -0,0 +1,35 @@
+from datetime import datetime
+import numpy as np
+
+def reward(responses):
+    reward = 0.0
+    for response in responses:
+        reward += int(response._recall)
+    return reward
+
+def update_metrics(responses, metrics, info):
+    # print("responses: ", responses)
+    prs = []
+    for response in responses:
+        prs.append(response['pr'])
+    if type(metrics) != list:
+        metrics = [prs]
+    else:
+        metrics.append(prs)
+    # print(metrics)
+    return metrics
+
+def eval_result(train_time, last_review, history, W):
+    with open(f"{datetime.now()}.txt", "w") as f:
+        print(train_time, file=f)
+        print(last_review, file=f)
+        print(history, file=f)
+        print(W, file=f)
+        # np.einsum('ij,ij->i', a, b)
+        last_review = train_time - last_review
+        mem_param = np.exp(np.einsum('ij,ij->i', history, W))
+        pr = np.exp(-last_review / mem_param)
+        print(pr, file=f)
+        print(pr)
+        print("score:", np.sum(pr) / pr.shape[0], file=f)
+        print("score:", np.sum(pr) / pr.shape[0])
\ No newline at end of file