为什么我的奖励函数在Python中不返回任何值？

def replay(self, batch): minibatch = R.sample(self.memory, batch) for prev_state, actions, state, reward, imagined_next_state in minibatch: target = [] imagined_next_state = np.add(np.random.random(self.state_size), imagined_next_state) target_m = self.model.predict(state) for i in range(len(target_m)): target_m[i][0][actions[i]]=reward history_m = self.model.fit(state, target_m, epochs=1, verbose=0) history_ae_ps = self.autoencoder.fit(prev_state, state, epochs=1, verbose=0) history_ae_ns = self.autoencoder.fit(state, imagined_next_state, epochs=1, verbose=0) loss_m = history_m.history['loss'][-1] loss_ae_ps = history_ae_ps.history['loss'][-1] loss_ae_ns = history_ae_ns.history['loss'][-1] print("LOSS AE PS:", loss_ae_ps) print("LOSS AE NS:", loss_ae_ns) loss_ae = loss_ae_ns - loss_ae_ps print(reward, loss_ae) return loss_ae

def loop(self, times='inf'): if times is 'inf': times = 2**31 reward = 0.0001 prev_shot = self.get_shot() for i in range(times): acts, ins, act_probs, shot = self.get_act() act_0 = acts[0] act_1 = acts[1] act_2 = acts[2] act_3 = acts[3] self.act_to_mouse(act_0, act_1) self.act_to_click(act_2) self.act_to_keys(act_3) reward = self.remember_and_replay(prev_shot, acts, shot, reward, ins) if reward is None: raise(RewardError("Rewards are none.")) prev_shot = shot

1条回答

网友

1楼 · 发布于 2024-09-30 18:24:53

我只是边打问题边解决。我只是没有在记忆和重放方法中返回奖励。。。你知道吗

remember\u和\u replay方法如下所示：

def remember_and_replay(self, prev_shot, action, shot, reward, ins):
        self.dqn.remember(prev_shot, action, shot, reward, ins)
        self.dqn.replay(1)

当它应该是这样的时候：

def remember_and_replay(self, prev_shot, action, shot, reward, ins):
        self.dqn.remember(prev_shot, action, shot, reward, ins)
        rew = self.dqn.replay(1)
        return rew

希望我能帮助别人。：）

相关问题更多 >

编程相关推荐

热门问题

热门文章