如何解决深度 Q 学习在算法交易中表现不佳
我使用 keras 框架在 Python 中实现了深度 q 学习,以重现论文的结果。但是,它不起作用。这是一些信息:
def build_dqn(lr,n_actions,input_dims,fc1_dims,fc2_dims,CLIP_GRADIENT=1):
#set_seed(42)
model = Sequential([
Dense(fc1_dims,input_shape=(input_dims,)),# bias_regularizer=regularizers.l2(1e-4),activity_regularizer=regularizers.l2(1e-5)
Activation('relu'),Batchnormalization(),Dense(fc2_dims),Activation('relu'),Dense(n_actions)])
model.compile(optimizer=Adam(lr=lr,clipvalue=CLIP_GRADIENT),loss='mse')
return model
class Agent(object):
def __init__(self,alpha,gamma,epsilon,batch_size,epsilon_dec=0.996,epsilon_end=0.01,mem_size=1000000,fname='dqn_model.h5'):
self.action_space = [i for i in range(n_actions)]
self.gamma = gamma
self.epsilon = epsilon
self.epsilon_dec = epsilon_dec
self.epsilon_min = epsilon_end
self.batch_size = batch_size
self.model_file = fname
self.memory = ReplayBuffer(mem_size,discrete=True)
self.q_eval = build_dqn(alpha,64,32)
def remember(self,state,action,reward,new_state,done):
self.memory.store_transition(state,done)
def choose_action(self,state):
state = state[np.newaxis,:]
rand = np.random.random()
if rand < self.epsilon:
action = np.random.choice(self.action_space)
else:
actions = self.q_eval.predict(state)
action = np.argmax(actions)
return action
def learn(self):
if self.memory.mem_cntr > self.batch_size:
state,done = \
self.memory.sample_buffer(self.batch_size)
action_values = np.array(self.action_space,dtype=np.int8)
action_indices = np.dot(action,action_values)
q_eval = self.q_eval.predict(state)
q_next = self.q_eval.predict(new_state)
q_target = q_eval.copy()
batch_index = np.arange(self.batch_size,dtype=np.int32)
q_target[batch_index,action_indices] = reward + \
self.gamma*np.max(q_next,axis=1)*done
_ = self.q_eval.fit(state,q_target,verbose=0)
self.epsilon = self.epsilon*self.epsilon_dec if self.epsilon > \
self.epsilon_min else self.epsilon_min
def processstate(self,state):
n = len(state)
relative_diff_matrix,prev_posiion = state[:n-1],state[n-1]
relative_diff_matrix = relative_diff_matrix.reshape((int(n/30),30))
relative_diff_matrix = np.diff(relative_diff_matrix) / relative_diff_matrix[:,:-1]
relative_diff_matrix = StandardScaler().fit_transform(relative_diff_matrix.T).T
processed_state = relative_diff_matrix.flatten()
processed_state = np.append(processed_state,prev_posiion)
return processed_state
def processReward(self,rewardClipping=1):
return np.clip(reward,-rewardClipping,rewardClipping)
def train_model(self,trainingEnv,n_episodes = 1,verbose=0):
scores = []
eps_history = []
for i in range(n_episodes):
done = False
score = 0
observation = env.reset()
observation = self.processstate(observation)
#observation = self.processstate(observation)
while not done:
action = agent.choose_action(observation)
observation_,done,info = trainingEnv.step(action)
# Remembering episode
reward = self.processReward(reward)
observation_ = self.processstate(observation_)
score += reward
self.remember(observation_,observation_,int(done))
# Remembering episode for other action => Better exploration
otherAction = int(not bool(action))
otherReward = self.processReward(info['Reward'])
otherNextState = self.processstate(info['State'])
otherDone = info['Done']
self.remember(observation_,otherAction,otherReward,otherNextState,otherDone)
observation = observation_
# learning
self.learn()
if verbose :
eps_history.append(agent.epsilon)
scores.append(score)
avg_score = np.mean(scores[max(0,i-100):(i+1)])
print('episode: ',i,'score: %.2f' % score,' average score %.2f' % avg_score)
trainingEnv.render()
def save_model(self):
self.q_eval.save(self.model_file)
def load_model(self):
self.q_eval = load_model(self.model_file)
我从 100 美元的资本开始,然后在 20 年(大约 10000 步)的范围内或多或少地完成。我尝试调整参数但没有任何效果。
这里是主要的:
env = TradingEnv(marketSymbol="GOOGL",period=PERIOD_DEFAULT,startingDate=START_DEFAULT,endingDate=END_DEFAULT,columns=COLUMNS,money=100,transactionCosts=0)
lr = 0.0005
agent = Agent(gamma=1,epsilon=0.00,alpha=lr,input_dims=117,n_actions=2,batch_size=32,epsilon_end=0.0)
agent.train_model(env)
解决方法
我想我已经设法解决了这个问题。我们需要将剧集数量设置为足够高的数字(不是 1),在我的例子中只有 30。但是,我不知道如何有效地回测深度 q 交易代理!
版权声明:本文内容由互联网用户自发贡献,该文观点与技术仅代表作者本人。本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。如发现本站有涉嫌侵权/违法违规的内容, 请发送邮件至 dio@foxmail.com 举报,一经查实,本站将立刻删除。