如何解决Q-learning中的环境
我需要你的帮助。我尝试在 Q-Learning 中创建一个由两个公差范围和参考曲线组成的环境。下面是我的算法,但它给了我这个错误(IndexError:无法从空序列中选择)。所以我不知道发生了什么。感谢您的帮助
类游戏: Xref=file[:,0] # 0 对应 à la 1ère Colonne etc Yref=file[:,1]/3.6 # (tr/min)
Xmin=[0]
Ymin=[0]
for i in range(len(Xref)-1):
Xmin.append(Xref[i]-2)
Ymin.append(Yref[i]-2)
Xmin=np.array(Xmin)
Ymin=np.array(Ymin)
Xmax=[0]
Ymax=[0]
for i in range(len(Xref)-1):
Xmax.append(Xref[i]+2)
Ymax.append(Yref[i]+2)
Xmax=np.array(Xmax)
Ymax=np.array(Ymax)
k=np.linspace(1,1,1801)
Xn=[0]
for i in range(len(Xref)-1):
if Xref[i] < Xmax[i]:
Xn.append(Xref[i] + 2**k)
else:
Xn.append(Xref[i] - 2**k)
Yn=[0]
for i in range(len(Yref)-1):
if Yref[i] < Ymax[i]:
Yn.append(Yref[i] + 2**k)
else:
Yn.append(Yref[i] - 2**k)
Actions = [Xref,Yref]
Act_names = ["UP","LEFT"]
Mouvements = {len(Xref),len(Yref)}
num_actions = len(Actions)
# Fonction méthode
def __init__(self,speed,time,wrong_act_p=0.1,alea=False): # https://micropyramid.com/blog/understand-self-and-__init__-method-in-python-class/
self.speed = speed
self.time = time
self.wrong_act_p = wrong_act_p
self.alea = alea
self.generate_game()
def _position_to_id(self,x,y):
"""Donne l'identifiant de la position entre 0 et 15"""
return x+y*self.speed
def _id_to_position(self,id): # https://fr.wikipedia.org/wiki/Python_(langage)
"""Réciproque de la fonction précédente"""
return (id % self.speed,id // self.speed)
def generate_game(self):
cases = [(x,y) for x in range(self.speed) for y in range(self.time)]
start = random.choice(cases)
cases.remove(start)
end = random.choice(cases)
cases.remove(end)
self.position = start
self.end = end
self.counter = 0
if not self.alea:
self.start = start
return self._get_state()
def reset(self):
if not self.alea:
self.position = self.start
self.counter = 0
return self._get_state()
else:
return self.generate_game()
# Définition de la surface de la grille
def _get_grille(self,y):
grille = [
[0] * self.speed for i in range(self.time)
]
grille[x][y] = 1
return grille
def _get_state(self):
if self.alea:
return [self._get_grille(x,y) for (x,y) in
[self.position,self.end]]
return self._position_to_id(*self.position)
def move(self,action):
"""
takes an action parameter
:param action : the id of an action
:return ((state_id,end,hole,block),reward,is_final,actions)
"""
self.counter += 1 # Ajoute une valeur et la variable et attribue le résultat à cette variable.
if action not in self.Actions:
raise Exception ("Action invalide") # Le raise mot-clé est utilisé pour lever une exception.
# random actions sometimes (2 times over 10 default)
choice = random.random() # Renvoie le nombre aléatoire à virgule flottante suivant dans la plage [0.0,1.0). https://docs.python.org/fr/3/library/random.html
if choice < self.wrong_act_p:
action = (action + 1) % 4
elif choice < 2 * self.wrong_act_p:
action = (action- 1) % 4
d_x,d_y = self.Mouvements[action]
x,y = self.position
new_x,new_y = x + d_x,y + d_y
if self.end == (new_x,new_y):
self.position = new_x,new_y
return self._get_state(),10,True,self.Actions
elif new_x >= self.speed or new_y >= self.m or new_x < 0 or new_y < 0:
return self._get_state(),-1,False,self.Actions
elif self.counter > 190:
self.position = new_x,-10,self.Actions
else:
self.position = new_x,self.Actions
def print(self):
str = ""
for i in range (self.speed - 1,-1):
for j in range (self.time):
if (i,j) == self.position:
str += "x"
elif (i,j) == self.end:
str += "@"
else:
str += "."
str += "\n"
print(str)
版权声明:本文内容由互联网用户自发贡献,该文观点与技术仅代表作者本人。本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。如发现本站有涉嫌侵权/违法违规的内容, 请发送邮件至 dio@foxmail.com 举报,一经查实,本站将立刻删除。