如何解决如何在每个状态下创建具有不同操作的 Open AI 健身房自定义环境并从 keras-rl 运行代理?
我正在尝试为我大学的一个项目创建一个自定义的 Open AI 健身房环境。我已经制作了一个与 Q-learning 一起使用的自定义环境,但是我在使用来自 kera-rl (DQNAgent) 的代理进行培训时遇到了问题。经过一些研究和尝试了很多事情,甚至我自己的代理我得出的结论是,主要问题是我对每个状态都有不同的操作,这使得我的环境与代理不兼容。
我的想法是让我的 action_space 包含所有可能的动作,如果选择了无效的动作,则用负奖励“惩罚”我的代理。但是有更好的解决方案吗?
这是我必须实现的环境。
请注意,中间状态(SA1,SA2,...)的数量不是固定的。最多可以有 n 个中间状态。
我制作的自定义环境是这样的:
import networkx as nx
import matplotlib.pyplot as plt
import random
#classes for the linked list
class Node:
def __init__(self,state):
self.state = state
self.nextStates = []
class LinkedList:
def __init__(self):
self.head = None
def getState_number(s,n): #(s= state and return number of state)
if s=="Sn":
return 0
elif s=="Sd":
return 1
elif s=="Se":
return n+2
else:
return int(s[2:])+1
class RL_Module():
def __init__(self,states,cost):
#creating a networkx Graph for visualizaition
self.G = nx.Graph()
self.G.add_edges_from([("Sn","Sd"),("Se","Sn"),("Sd","Se")])
self.nodes = ["Sn","Sd","Se"]
self.edges = [("Sn","Se")]
#Creating linked list
self.start = LinkedList()
#Creating our 3 constant states
self.start.head = Node("Sn") #Stable state
self.failure = Node("Se") #State of failure
self.decision = Node("Sd") #State to take a decision
self.state = self.start.head
self.reward = 0
#Numberof middle sates
self.middle_states = len(states)
self.start.head.nextStates.append((self.decision,0))
self.decision.nextStates.append((self.failure,0))
self.failure.nextStates.append((self.start.head,cost))
#appending middle states
i = 1
for s in states: # states = [...] -> Si = (action to go to state Si,action for Se,action for Sn)
name = "Sa"+ str(i)
temp = Node(name)
temp.nextStates.append((self.failure,s[1]))
temp.nextStates.append((self.start.head,s[2]))
self.decision.nextStates.append((temp,s[0]))
i = i + 1
#Graph
self.nodes.append(name)
self.G.add_edges_from([("Se",name),("Sn",name)])
self.edges.extend([("Se",name)])
def step(self,action):
done = False
next_state = 0
s = self.state
next = s.nextStates[action]
# print("s:",s)
# print("next:",next)
s_next = next[0]
# self.reward += next[1]
reward = next[1]
self.state = s_next
state=getState_number(self.state.state,self.middle_states)
if self.state.state=="Sn":
done = True
return (state,reward,done,"all good")
def action_space_n(self):
l=[]
p=0
for n in self.state.nextStates:
l.append(p)
p +=1
return l
def action_space(self):
return self.state.nextStates
def reset(self):
self.state = self.start.head
return 0
#not needed functions
def render(self): #plots the Graph
G = self.G
pos = nx.circular_layout(G)
nx.draw_networkx_nodes(G,pos,node_size=700,nodelist=self.nodes)
nx.draw_networkx_labels(G,font_size=20,font_family='sans-serif')
nx.draw_networkx_edges(G,edgelist=self.edges,width=6)
nx.draw_networkx_nodes(G,node_size=1400,nodelist=[self.state.state],node_color='red')
nx.draw_networkx_edge_labels(G,edge_labels={('Se','Sn'):'Hi'},font_color='red')
plt.axis('off')
plt.show()
def plot_frame(self): #returns the ploted the Graph
G = self.G
pos = nx.circular_layout(G)
nx.draw_networkx_nodes(G,font_color='red')
plt.axis('off')
return plt
def random_step(self): #Random step
s = self.state
print(getState_number(s.state,self.middle_states))
next = random.choice(s.nextStates)
print("s:",s)
print("next:",self.middle_states)
print(getState_number(s_next.state,self.middle_states))
return (state,False,"all good")
networkx 只是为了可视化环境。
你可以运行如下环境:
model = RL_Module([(-1,0),(-3,(-5,0)],-10) # [(-1,0)] are the middle states
model.render()
step = model.random_step()
#or .step() given a valid action
model.step(action)
正如我所说,我是 RL 的新手,因此非常感谢对我的代码或链接的任何评论,以帮助我更好地理解某些内容!谢谢!
版权声明:本文内容由互联网用户自发贡献,该文观点与技术仅代表作者本人。本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。如发现本站有涉嫌侵权/违法违规的内容, 请发送邮件至 dio@foxmail.com 举报,一经查实,本站将立刻删除。