Compare commits

...

1 Commits
ucb ... master

Author SHA1 Message Date
a583855e47 fix random seeds with np.randon.RandomState() 2023-10-29 23:18:48 +08:00
5 changed files with 19 additions and 12 deletions

View File

@ -2,13 +2,13 @@ from .FlashcardDocument import FlashcardDocument
from recsim import document from recsim import document
class FlashcardDocumentSampler(document.AbstractDocumentSampler): class FlashcardDocumentSampler(document.AbstractDocumentSampler):
def __init__(self, doc_ctor=FlashcardDocument, **kwargs): def __init__(self, doc_ctor=FlashcardDocument, seed=0, **kwargs):
super(FlashcardDocumentSampler, self).__init__(doc_ctor, **kwargs) super(FlashcardDocumentSampler, self).__init__(doc_ctor, seed, **kwargs)
self._doc_count = 0 self._doc_count = 0
def sample_document(self): def sample_document(self):
doc_features = {} doc_features = {}
doc_features['doc_id'] = self._doc_count doc_features['doc_id'] = self._doc_count
doc_features['difficulty'] = self._rng.uniform(0, 5, (1, 3)) doc_features['difficulty'] = self._rng.uniform(0, 3, (1, 3))
self._doc_count += 1 self._doc_count += 1
return self._doc_ctor(**doc_features) return self._doc_ctor(**doc_features)

View File

@ -17,8 +17,8 @@ tf.compat.v1.disable_eager_execution()
create_agent_fn = create_agent_helper(full_slate_q_agent.FullSlateQAgent) create_agent_fn = create_agent_helper(full_slate_q_agent.FullSlateQAgent)
ltsenv = environment.Environment( ltsenv = environment.Environment(
FlashcardUserModel(num_candidates, time_budget, slate_size), FlashcardUserModel(num_candidates, time_budget, slate_size, seed=0, sample_seed=0),
FlashcardDocumentSampler(), FlashcardDocumentSampler(seed=0),
num_candidates, num_candidates,
slate_size, slate_size,
resample_documents=False) resample_documents=False)

View File

@ -7,13 +7,14 @@ from util import eval_result
import numpy as np import numpy as np
class FlashcardUserModel(user.AbstractUserModel): class FlashcardUserModel(user.AbstractUserModel):
def __init__(self, num_candidates, time_budget, slate_size, seed=0): def __init__(self, num_candidates, time_budget, slate_size, seed=0, sample_seed=0):
super(FlashcardUserModel, self).__init__( super(FlashcardUserModel, self).__init__(
UserResponse, UserSampler( UserResponse, UserSampler(
UserState, num_candidates, time_budget, UserState, num_candidates, time_budget,
seed=seed seed=sample_seed
), slate_size) ), slate_size)
self.choice_model = MultinomialLogitChoiceModel({}) self.choice_model = MultinomialLogitChoiceModel({})
self._rng = np.random.RandomState(seed)
def is_terminal(self): def is_terminal(self):
terminated = self._user_state._time > self._user_state._time_budget terminated = self._user_state._time > self._user_state._time_budget
@ -52,7 +53,8 @@ class FlashcardUserModel(user.AbstractUserModel):
doc_id = doc._doc_id doc_id = doc._doc_id
W = self._user_state._W[doc_id] W = self._user_state._W[doc_id]
if not W.any(): # uninitialzed if not W.any(): # uninitialzed
self._user_state._W[doc_id] = W = doc.base_difficulty * np.random.uniform(0.5, 2.0, (1, 3)) # a uniform error for each user error = self._user_state._doc_error[doc_id] # a uniform error for each user
self._user_state._W[doc_id] = W = doc.base_difficulty * error
print(W) print(W)
# use exponential function to simulate whether the user recalls # use exponential function to simulate whether the user recalls
last_review = self._user_state._time - self._user_state._last_review[doc_id] last_review = self._user_state._time - self._user_state._last_review[doc_id]
@ -60,6 +62,6 @@ class FlashcardUserModel(user.AbstractUserModel):
pr = np.exp(-last_review / np.exp(np.dot(W, x))).squeeze() pr = np.exp(-last_review / np.exp(np.dot(W, x))).squeeze()
print(f"time: {self._user_state._time}, reviewing flashcard {doc_id}, recall rate = {pr}") print(f"time: {self._user_state._time}, reviewing flashcard {doc_id}, recall rate = {pr}")
if np.random.rand() < pr: # remembered if self._rng.random_sample() < pr: # remembered
response._recall = True response._recall = True
response._pr = pr response._pr = pr

View File

@ -7,9 +7,13 @@ class UserSampler(user.AbstractUserSampler):
num_candidates=10, num_candidates=10,
time_budget=60, time_budget=60,
**kwargs): **kwargs):
self._state_parameters = {'num_candidates': num_candidates, 'time_budget': time_budget}
super(UserSampler, self).__init__(user_ctor, **kwargs) super(UserSampler, self).__init__(user_ctor, **kwargs)
doc_error = self._rng.uniform(0.5, 1.5, (num_candidates, 3))
self._state_parameters = {
'num_candidates': num_candidates,
'time_budget': time_budget,
'doc_error': doc_error
}
def sample_user(self): def sample_user(self):
return self._user_ctor(**self._state_parameters) return self._user_ctor(**self._state_parameters)

View File

@ -3,13 +3,14 @@ import numpy as np
from gym import spaces from gym import spaces
class UserState(user.AbstractUserState): class UserState(user.AbstractUserState):
def __init__(self, num_candidates, time_budget): def __init__(self, num_candidates, time_budget, doc_error):
self._cards = num_candidates self._cards = num_candidates
self._history = np.zeros((num_candidates, 3)) self._history = np.zeros((num_candidates, 3))
self._last_review = np.repeat(-1.0, num_candidates) self._last_review = np.repeat(-1.0, num_candidates)
self._time_budget = time_budget self._time_budget = time_budget
self._time = 0 self._time = 0
self._W = np.zeros((num_candidates, 3)) self._W = np.zeros((num_candidates, 3))
self._doc_error = doc_error
super(UserState, self).__init__() super(UserState, self).__init__()
def create_observation(self): def create_observation(self):
return {'history': self._history, 'last_review': self._last_review, 'time': self._time, 'time_budget': self._time_budget} return {'history': self._history, 'last_review': self._last_review, 'time': self._time, 'time_budget': self._time_budget}