Python gym 模块,upload() 实例源码
我们从Python开源项目中,提取了以下32个代码示例,用于说明如何使用gym.upload()。
def upload():
"""
Upload the results of training (as automatically recorded by
your env's monitor) to OpenAI Gym.
Parameters:
- training_dir: A directory containing the results of a
training run.
- api_key: Your OpenAI API key
- algorithm_id (default=None): An arbitrary string
indicating the paricular version of the algorithm
(including choices of parameters) you are running.
"""
j = request.get_json()
training_dir = get_required_param(j, 'training_dir')
api_key = get_required_param(j, 'api_key')
algorithm_id = get_optional_param(j, 'algorithm_id', None)
try:
gym.upload(training_dir, algorithm_id, writeup=None, api_key=api_key,
ignore_open_monitors=False)
return ('', 204)
except gym.error.AuthenticationError:
raise InvalidUsage('You must provide an OpenAI Gym API key')
def run(submit_key, gpu):
env = Environment()
agent = DQNAgent(env.actions, epsilon=0.01, model_path=PATH, on_gpu=gpu)
path = ""
episode = 5
if submit_key:
print("make directory to submit result")
path = os.path.join(os.path.dirname(__file__), "submit")
episode = 100
for ep, s, r in env.play(agent, episode=episode, render=True, action_interval=4, record_path=path):
pass
if submit_key:
gym.upload(path, api_key=submit_key)
def upload_results(folder):
gym.upload(folder, api_key=secrets.api_key)
def close(self):
"""Flush all monitor data to disk and close any open rending windows."""
if not self.enabled:
return
self.stats_recorder.close()
if self.video_recorder is not None:
self._close_video_recorder()
self._flush(force=True)
# Stop tracking this for autoclose
monitor_closer.unregister(self._monitor_id)
self.enabled = False
logger.info('''Finished writing results. You can upload them to the scoreboard via gym.upload(%r)''', self.directory)
def test(self, model_name):
self.load_model(model_name)
if pms.record_movie:
for i in range(100):
self.storage.get_single_path()
self.env.env.monitor.close()
if pms.upload_to_gym:
gym.upload("log/trpo",algorithm_id='alg_8BgjkAsQRNiWu11xAhS4Hg', api_key='sk_IJhy3b2QkqL3LWzgBXoVA')
else:
for i in range(50):
self.storage.get_single_path()
def test(self, model_name):
self.load_model(model_name)
if pms.record_movie:
for i in range(100):
self.storage.get_single_path()
self.env.env.monitor.close()
if pms.upload_to_gym:
gym.upload("log/trpo" , algorithm_id='alg_8BgjkAsQRNiWu11xAhS4Hg' , api_key='sk_IJhy3b2QkqL3LWzgBXoVA')
else:
for i in range(50):
self.storage.get_single_path()
def close(self):
"""Flush all monitor data to disk and close any open rending windows."""
if not self.enabled:
return
self.stats_recorder.close()
if self.video_recorder is not None:
self._close_video_recorder()
self._flush(force=True)
# Stop tracking this for autoclose
monitor_closer.unregister(self._monitor_id)
self.enabled = False
logger.info('''Finished writing results. You can upload them to the scoreboard via gym.upload(%r)''', self.directory)
def Upload():
# Upload training record
gym.upload(RECORD_DIR + RECORD_FILENAME, api_key = API_KEY)
def main():
parser = argparse.ArgumentParser(description = 'Designate AI mode')
parser.add_argument('-m','--mode', help = 'train / test / upload', required = True)
args = vars(parser.parse_args())
if args['mode'] == 'train':
Train_Model(env = env)
elif args['mode'] == 'test':
Test_Model(env = env)
elif args['mode'] == 'upload':
Upload()
else:
print('Please designate AI mode.')
def Upload():
# Upload training record
gym.upload(RECORD_DIR + RECORD_FILENAME, api_key = API_KEY)
def main():
parser = argparse.ArgumentParser(description = 'Designate AI mode')
parser.add_argument('-m', required = True)
args = vars(parser.parse_args())
if args['mode'] == 'train':
Train_Model(env = env)
elif args['mode'] == 'test':
Test_Model(env = env)
elif args['mode'] == 'upload':
Upload()
else:
print('Please designate AI mode.')
def Upload():
# Upload training record
gym.upload(RECORD_DIR + RECORD_FILENAME, api_key = API_KEY)
def Upload():
# Upload training record
gym.upload(RECORD_DIR + RECORD_FILENAME, api_key = API_KEY)
def main():
parser = argparse.ArgumentParser(description = 'Designate AI mode')
parser.add_argument('-m', required = True)
args = vars(parser.parse_args())
if args['mode'] == 'train':
Train_Model(env = env)
elif args['mode'] == 'test':
Test_Model(env = env)
elif args['mode'] == 'upload':
Upload()
else:
print('Please designate AI mode.')
def Upload():
# Upload training record
gym.upload(RECORD_DIR + RECORD_FILENAME, api_key = API_KEY)
def main():
parser = argparse.ArgumentParser(description = 'Designate AI mode')
parser.add_argument('-m', required = True)
args = vars(parser.parse_args())
if args['mode'] == 'train':
Train_Model(env = env)
elif args['mode'] == 'test':
Test_Model(env = env)
elif args['mode'] == 'upload':
Upload()
else:
print('Please designate AI mode.')
def Upload():
# Upload training record
gym.upload(RECORD_DIR + RECORD_FILENAME, api_key = API_KEY)
def Upload():
# Upload training record
gym.upload(RECORD_DIR + RECORD_FILENAME, api_key = API_KEY)
def main():
parser = argparse.ArgumentParser(description = 'Designate AI mode')
parser.add_argument('-m', required = True)
args = vars(parser.parse_args())
if args['mode'] == 'train':
Train_Model(env = env)
elif args['mode'] == 'test':
Test_Model(env = env)
elif args['mode'] == 'upload':
Upload()
else:
print('Please designate AI mode.')
def Upload():
# Upload training record
gym.upload(RECORD_DIR + RECORD_FILENAME, api_key = API_KEY)
def main():
parser = argparse.ArgumentParser(description = 'Designate AI mode')
parser.add_argument('-m', required = True)
args = vars(parser.parse_args())
if args['mode'] == 'train':
Train_Model(env = env)
elif args['mode'] == 'test':
Test_Model(env = env)
elif args['mode'] == 'upload':
Upload()
else:
print('Please designate AI mode.')
def handleUpload(self):
gym.upload('tmp/'+self.envEntry.get(),api_key="sk_8j3LQ561SH20sk0YN3qpg")
def run(config_file):
local_dir = os.path.dirname(__file__)
config_path = os.path.join(local_dir, 'config')
config = neat.Config(neat.DefaultGenome, neat.DefaultReproduction,
neat.DefaultSpeciesSet, neat.DefaultStagnation,
config_path )
# Create the population,which is the top-level object for a NEAT run
population = neat.Population(config)
population.add_reporter(neat.StdOutReporter(True))
stats = neat.StatisticsReporter()
population.add_reporter(stats)
population.add_reporter(neat.Checkpointer(5))
#Run for 300 generation
winner = population.run(eval_genomes,generations)
# display the winning genome.
print('\nBest genome:\n{!s}'.format(winner))
# Show output of the most fit genome against training data.
winner_net = neat.nn.FeedForwardNetwork.create(winner, config)
scores=[]
# Create the environment for the test and wrap it with a Monitor
env = gym.make('MountainCar-v0')
#env = wrappers.Monitor(env,'tmp/MountainCar-v0')
for i in range(trials):
score=0
observation=env.reset()
for _ in range(goal_steps):
action = np.argmax(winner_net.activate(observation))
# do it!
observation, reward, done, info = env.step(action)
score += reward
if done: break
scores.append(score)
print("The winning neural network obtained an average score of: "+str(np.average(scores)))
if(np.average(scores)>-110):
gym.upload('tmp/MountainCar-v0',api_key='sk_tiwKaUHVQDChjmO9JmK2Gg')
p = neat.Checkpointer.restore_checkpoint('neat-checkpoint-4')
p.run(eval_genomes, 10)
def run(self,
epochs,
steps,
api_key,
rollouts_per_epoch = 100,
updateTargetNetwork = defaultRunSettings['updateTargetNetwork'],
explorationRate = defaultRunSettings['explorationRate'],
miniBatchSize = defaultRunSettings['miniBatchSize'],
learnStart = defaultRunSettings['learnStart'],
renderPerXEpochs = defaultRunSettings['renderPerXEpochs'],
shouldRender = defaultRunSettings['shouldRender'],
experimentId = defaultRunSettings['experimentId'],
force = defaultRunSettings['force'],
upload = defaultRunSettings['upload']):
last100scores = [0] * 100
last100scoresIndex = 0
last100Filled = False
if experimentId != None:
self.env.monitor.start('tmp/'+experimentId, force = force)
for epoch in xrange(epochs):
paths = []
for rollout in xrange(rollouts_per_epoch):
path = {}
path["actions"] = []
path["rewards"] = []
path["states"] = []
path["isDone"] = []
observation = self.env.reset()
# number of timesteps
totalReward = 0
for t in xrange(steps):
policyValues = self.runModel(self.policyModel, observation)
action = self.selectActionByProbability(policyValues)
# action = self.selectActionByProbability(self.convertToProbabilities(policyValues))
path["states"].append(observation)
path["actions"].append(action)
newObservation, info = self.env.step(action)
path["rewards"].append(reward)
path["isDone"].append(done)
totalReward += reward
observation = newObservation
if done:
break
paths.append(path)
self.learn(paths)
self.env.monitor.close()
if upload:
gym.upload('/tmp/'+experimentId, api_key=api_key)
def run(self,
rollouts_per_epoch = 20,
upload = defaultRunSettings['upload']):
last100scores = [0] * 100
last100scoresIndex = 0
last100Filled = False
stepCounter = 0
if not experimentId == None:
self.env.monitor.start('tmp/'+experimentId, force = force)
for epoch in xrange(epochs):
I = 1
observation = self.env.reset();
for t in xrange(steps):
policyValues = self.runModel(self.policyModel, observation)
action = self.selectActionByProbability(policyValues)
newObservation, info = self.env.step(action)
cost, grads = self.get_cost_grads(self.policyModel);
print (theano.pp(grads[1][0]));
if done:
delta = reward + self.discountFactor * self.runModel(self.valueModel, newObservation) - self.runModel(self.valueModel, observation)
else :
delta = reward - self.runModel(self.valueModel, observation) # because the value for new obs is 0
self.env.monitor.close()
if upload:
gym.upload('/tmp/'+experimentId, api_key=api_key)
def main():
env = gym.make('CartPole-v1')
env.monitor.start("cartpole-ex", force=True)
agent = DQN_agent(env)
agent.Q_network_model.load_params(ParaMS_FILE_NAME)
for episode in range(MAX_EPISODES):
state = env.reset()
for t in range(STEPS_PER_EPISODE):
tic = time.time()
env.render()
# print "render time:" + str(time.time() - tic)
tic = time.time()
action = agent.egreedy_action(state)
# print "react time:" + str(time.time() - tic)
next_state, info = env.step(action)
tic = time.time()
agent.learn(state, action, next_state, done)
# print "learn time:" + str(time.time() - tic)
state = next_state
if done:
print("Episode {} finished after {} timesteps with epsilon {}.".format(episode, t+1, agent.epsilon))
break
if SAVE_ParaMS_INTERVAL:
agent.Q_network_model.save_params(ParaMS_FILE_NAME)
# Test every 100 episodes
if episode % 100 == 0:
total_reward = 0
for i in range(TEST_EPISODES):
state = env.reset()
for j in xrange(STEPS_PER_EPISODE):
env.render()
action = agent.react(state) # direct action for test
state, _ = env.step(action)
total_reward += reward
if done:
break
ave_reward = total_reward / TEST_EPISODES
log_string = 'episode: {},Evaluation Average Reward:{}'.format(episode, ave_reward)
logging.debug(log_string)
print log_string
if ave_reward >= STEPS_PER_EPISODE:
break
env.monitor.close()
# gym.upload("cartpole-ex",algorithm_id="x",api_key="x")
def main():
env = gym.make('CartPole-v1')
env.monitor.start("cartpole-ex", done)
# print "learn time:" + str(time.time() - tic)
state = next_state
if done:
logging.log(logging.DEBUG, "Episode {} finished after {} timesteps with epsilon {}.".format(episode,api_key="x")
def main():
env = gym.make('Acrobot-v1')
env.monitor.start("Acrobot-ex", force=True)
agent = DQN_agent(env)
if os.path.exists(ParaMS_FILE_NAME):
agent.Q_network_model.load_params(ParaMS_FILE_NAME)
for episode in range(MAX_EPISODES):
state = env.reset()
for t in range(STEPS_PER_EPISODE):
tic = time.time()
env.render()
# print "render time:" + str(time.time() - tic)
tic = time.time()
action = agent.egreedy_action(state)
# print "react time:" + str(time.time() - tic)
next_state,api_key="x")
def main():
env = gym.make('LunarLander-v2')
env.monitor.start("LunarLander-v2", force=True)
agent = DQN_agent(env)
if os.path.exists(ParaMS_FILE_NAME):
agent.Q_network_model.load_params(ParaMS_FILE_NAME)
for episode in range(MAX_EPISODES):
state = env.reset()
acc = get_accumulator()
for t in range(STEPS_PER_EPISODE):
tic = time.time()
env.render()
# print "render time:" + str(time.time() - tic)
tic = time.time()
action = agent.egreedy_action(state)
# print "react time:" + str(time.time() - tic)
next_state, info = env.step(action)
acc(reward)
tic = time.time()
agent.learn(state, agent.epsilon))
break
if SAVE_ParaMS_INTERVAL:
agent.Q_network_model.save_params(ParaMS_FILE_NAME)
print acc(0)/t
# Test every 100 episodes
if episode % 100 == 0:
total_reward = get_accumulator()
for i in range(TEST_EPISODES):
state = env.reset()
for j in xrange(STEPS_PER_EPISODE):
env.render()
action = agent.react(state) # direct action for test
state, _ = env.step(action)
total_reward(reward)
if done:
break
ave_reward = total_reward(0) / TEST_EPISODES
log_string = 'episode: {},api_key="x")
def main():
env = gym.make('CartPole-v0')
env.monitor.start("cartpole-ex", force=True)
agent = DQN_agent(env)
for episode in range(MAX_EPISODES):
state = env.reset()
for t in range(STEPS_PER_EPISODE):
tic = time.time()
env.render()
# print "render time:" + str(time.time() - tic)
tic = time.time()
action = agent.egreedy_action(state)
# print "react time:" + str(time.time() - tic)
next_state, agent.epsilon))
break
# Test every 100 episodes
if episode % 100 == 0:
record_filename = 'cartpole-experiment-{}'.format(episode)
is_record = episode % RECORD_INTERVAL == 0 #and episode != 0
if is_record:
# env.monitor.start(record_filename,force=True)
agent.Q_network_model.save_params("current_params.params")
total_reward = 0
for i in range(TEST_EPISODES):
state = env.reset()
for j in xrange(STEPS_PER_EPISODE):
env.render()
action = agent.react(state) # direct action for test
state, ave_reward)
logging.debug(log_string)
print log_string
if ave_reward >= 200:
break
if is_record:
pass
env.monitor.close()
# gym.upload("cartpole-ex",api_key="x")
def __init__(self, master):
self.master = master
self.frame = Frame(self.master,height=1000,width=450)
self.frame.grid()
#jobs label
self.envLabel = Label(self.master,text="Jobs: ").grid(row=1,column=0,sticky=W)
self.envNum = Intvar()
self.envNumEntry = Entry(self.master,textvariable=self.envNum)
self.envNumEntry.insert(END,'2')
self.envNum.set('2')
self.envNumEntry.grid(row=1,sticky=E)
#popluation label
self.populationLabel = Label(self.master,text="Population")
self.populationLabel.grid(row=2,sticky=W)
self.population = Intvar()
self.populationEntry = Entry(self.master,textvariable=self.population)
self.populationEntry.insert(END,'300')
self.population.set('300')
self.populationEntry.grid(row=2,sticky=E)
#file saver button
self.fileSaverButton = Button(self.frame,text="save pool",command=self.saveFile)
self.fileSaverButton.grid(row=2,column=1)
self.fileLoaderButton = Button(self.frame,text="load pool", command=self.loadFile)
self.fileLoaderButton.grid(row=2,column=2)
#run button
self.runButton = Button(self.frame,text="start run", command=self.toggleRun)
self.runButton.grid(row=2,column=3)
#play best button
self.playBestButton = Button(self.frame,text='play best',command =self.playBest)
self.playBestButton.grid(row=2,column=4)
#uploadButton
self.uploadButton = Button(self.frame,text="upload",command=self.handleUpload)
self.uploadButton.grid(row=2,column=5)
#attemps label
self.attempsLabel = Label(self.master,text="attemps")
self.attempsLabel.grid(row=3,sticky=W)
self.attemps = Intvar()
self.attempsEntry = Entry(self.master,textvariable=self.attemps)
self.attempsEntry.insert(END,'1')
self.attemps.set('1')
self.attempsEntry.grid(row=3,sticky=E)
#env label
self.envLabel = Label(self.master,text="enviroment")
self.envLabel.grid(row=4,sticky=W)
self.envEntry = Entry(self.master)
self.envEntry.insert(END,'CartPole-v1')
self.envEntry.grid(row=4,sticky=E)
self.netProccess = None
self.running= False
self.poolInitialized = False
self.pool = None
self.lastPopulation = []
self.plotDictionary = {}
self.plotData = []
self.genomeDictionary = {}
self.specieID = 0
self.fig,self.ax = plt.subplots(figsize=(10,6))
self.ax.stackplot([],[],baseline='wiggle')
canvas = figureCanvasTkAgg(self.fig,self.master)
canvas.get_tk_widget().grid(row=5,rowspan=4,sticky="nesw")
def play(self, test_ep, n_step=10000, n_episode=100):
tf.initialize_all_variables().run()
self.stat.load_model()
self.target_network.run_copy()
if not self.env.display:
gym_dir = '/tmp/%s-%s' % (self.env_name, get_time())
env = gym.wrappers.Monitor(self.env.env, gym_dir)
best_reward, best_idx, best_count = 0, 0, 0
try:
itr = xrange(n_episode)
except NameError:
itr = range(n_episode)
for idx in itr:
observation, terminal = self.new_game()
current_reward = 0
for _ in range(self.history_length):
self.history.add(observation)
for self.t in tqdm(range(n_step), ncols=70):
# 1. predict
action = self.predict(self.history.get(), test_ep)
# 2. act
observation, terminal, info = self.env.step(action, is_training=False)
# 3. observe
q, loss, is_update = self.observe(observation, terminal)
logger.debug("a: %d,r: %d,t: %d,q: %.4f,l: %.2f" % \
(action, np.mean(q), loss))
current_reward += reward
if terminal:
break
if current_reward > best_reward:
best_reward = current_reward
best_idx = idx
best_count = 0
elif current_reward == best_reward:
best_count += 1
print ("="*30)
print (" [%d] Best reward : %d (dup-percent: %d/%d)" % (best_idx, best_reward, best_count, n_episode))
print ("="*30)
#if not self.env.display:
#gym.upload(gym_dir,writeup='https://github.com/devsisters/DQN-tensorflow',api_key='')
版权声明:本文内容由互联网用户自发贡献,该文观点与技术仅代表作者本人。本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。如发现本站有涉嫌侵权/违法违规的内容, 请发送邮件至 dio@foxmail.com 举报,一经查实,本站将立刻删除。