如何解决索引 1 超出维度 0 和大小 1 的范围
我开始学习DQN
,我正在尝试自己从头开始解决FrozenLake-v0
问题并使用Pytorch
,所以我将把整个代码,因为它是连接的.
class LinearDeepQNetwork(nn.Module):
def __init__(self,lr,n_action,input_dim):
super(LinearDeepQNetwork,self).__init__()
self.f1=nn.Linear(input_dim,128)
self.f2=nn.Linear(128,n_action)
self.optimizer=optim.Adam(self.parameters(),lr=lr)
self.loss=nn.MSELoss()
self.device=T.device('cuda' if T.cuda.is_available() else 'cpu')
self.to(self.device)
def forward(self,state):
layer1=F.relu(self.f1(state))
actions=self.f2(layer1)
return actions
第二类是agent,问题出在学习函数上
nd class是agent,问题出在学习函数上
class Agent():
def __init__(self,input_dim,gamma=0.99,epslion=1.0,eps_dec=1e-5,eps_min=0.01):
self.input_dim=input_dim
self.n_action=n_action
self.lr=lr
self.gamma=gamma
self.epslion=epslion
self.eps_dec=eps_dec
self.eps_min=eps_min
self.action_space=[i for i in range(self.n_action)]
self.Q=LinearDeepQNetwork(self.lr,self.n_action,self.input_dim)
def choose_action(self,observation):
if np.random.random()>self.epslion:
#conveate the state into tensor
state=T.tensor(observation).to(self.Q.device)
actions=self.Q.forward(state)
action=T.argmax(actions).item()
else:
action=np.random.choice(self.action_space)
return action
def decrement_epsilon(self):
self.epslion=self.epslion-self.eps_dec \
if self.epslion > self.eps_min else self.eps_min
def OH(self,x,l):
x = T.LongTensor([[x]])
one_hot = T.FloatTensor(1,l)
return one_hot.zero_().scatter_(1,1)
def learn(self,state,action,reward,state_):
self.Q.optimizer.zero_grad()
states=Variable(self.OH(state,16)).to(self.Q.device)
actions=T.tensor(action).to(self.Q.device)
rewards=T.tensor(reward).to(self.Q.device)
state_s=Variable(self.OH(state_,16)).to(self.Q.device)
q_pred=self.Q.forward(states)[actions]
q_next=self.Q.forward(state_s).max()
q_target=reward+self.gamma*q_next
loss=self.Q.loss(q_target,q_pred).to(self.Q.device)
loss.backward()
self.Q.optimizer.step()
self.decrement_epsilon()
现在,当我运行以下代码时出现问题,它在学习阶段给我一个错误,它给了我这个错误 index 1 is out of bounds for dimension 0 with size 1.
env=gym.make('FrozenLake-v0')
n_games=5000
scores=[]
eps_history=[]
agent=Agent(env.observation_space.n,env.action_space.n,0.0001)
for i in tqdm(range(n_games)):
score=0
done=False
obs=env.reset()
while not done:
action=agent.choose_action(obs)
obs_,done,_=env.step(action)
score+=reward
agent.learn(obs,obs_)
obs=obs_
scores.append(score)
eps_history.append(agent.epslion)
if i % 100 ==0:
avg_score=np.mean(scores[-100:])
print(f'score={score} avg_score={avg_score} epsilon={agent.epslion} i={i}')
我认为问题出在 NN 和代理类之间的值的形状上,但我无法弄清楚问题所在。
错误回溯:
IndexError Traceback (most recent call last)
<ipython-input-10-2e279f658721> in <module>()
17 score+=reward
18
---> 19 agent.learn(obs,obs_)
20 obs=obs_
21 scores.append(score)
<ipython-input-8-5359b19ec4fa> in learn(self,state_)
39 state_s=Variable(self.OH(state_,16)).to(self.Q.device)
40
---> 41 q_pred=self.Q.forward(states)[actions]
42
43 q_next=self.Q.forward(state_s).max()
IndexError: index 1 is out of bounds for dimension 0 with size 1
解决方法
由于您正在调用包含矩阵的张量,因此您需要指定在您的情况下调用的索引,只需将 import { AClass } from './AClass';
const x = new AClass();
x.function1('Param X');
添加到 forward 语句即可解决问题,并在 [0]
中替换它与 [actions]
[actions.item()]
版权声明:本文内容由互联网用户自发贡献,该文观点与技术仅代表作者本人。本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。如发现本站有涉嫌侵权/违法违规的内容, 请发送邮件至 dio@foxmail.com 举报,一经查实,本站将立刻删除。