diff --git a/agent/__init__.py b/agent/__init__.py index 8016aed..c816552 100644 --- a/agent/__init__.py +++ b/agent/__init__.py @@ -1 +1,2 @@ -from .util import create_agent_helper \ No newline at end of file +from .util import create_agent_helper +from .greedy import GreedyAgent \ No newline at end of file diff --git a/agent/greedy.py b/agent/greedy.py new file mode 100644 index 0000000..201cff2 --- /dev/null +++ b/agent/greedy.py @@ -0,0 +1,58 @@ +from recsim.agent import AbstractEpisodicRecommenderAgent +import numpy as np + +class GreedyAgent(AbstractEpisodicRecommenderAgent): + def __init__(self, sess, observation_space, action_space, eval_mode, summary_writer): + super(GreedyAgent, self).__init__(action_space, summary_writer) + self._num_candidates = int(action_space.nvec[0]) + self._W = np.array([[3, 1.5, 0.5]] * self._num_candidates) + assert self._slate_size == 1 + def begin_episode(self, observation=None): + user = observation['user'] + docs = observation['doc'] + if 'W' in user: # use observable W + self._W = user['W'] + else: + w = [] + for doc_id in docs: + w.append(docs[doc_id]) + self._W = np.array(w).reshape((-1, 3)) + print("agent W:", self._W) + self._episode_num += 1 + return self.step(0, observation) + def step(self, reward, observation): + docs = observation['doc'] + user = observation['user'] + + base_pr = self.calc_prs(user['time'], user['last_review'], user['history'], self._W) + # np.exp(-last_review / np.exp(np.dot(W, x))).squeeze() + max_pr = -self._num_candidates + max_id = 0 + for did in docs: + doc_id = int(did) + last_review = user['last_review'].copy() + history = user['history'].copy() + last_review[doc_id] = user['time'] + time = user['time'] + 1 + + history[doc_id][0] += 1 + history[doc_id][1] += 1 + pr1 = self.calc_prs(time, last_review, history, self._W) + history[doc_id][1] -= 1 + history[doc_id][2] += 1 + pr2 = self.calc_prs(time, last_review, history, self._W) + pr = (pr1 + pr2) / 2 - base_pr + sum_pr = np.sum(pr) + if sum_pr > max_pr: + max_pr = sum_pr + max_id = doc_id + # print("pr1", pr1) + # print("pr2", pr2) + # print("pr0", base_pr) + print(f"choose doc{max_id} with marginal gain {max_pr}") + return [max_id] + def calc_prs(self, train_time, last_review, history, W): + last_review = train_time - last_review + mem_param = np.exp(np.einsum('ij,ij->i', history, W)) + pr = np.exp(-last_review / mem_param) + return pr \ No newline at end of file