import pandas as pd import numpy as np from keras.models import Sequential from keras.layers import Dense from keras.layers import Dropout from keras.layers import LSTM from keras.optimizers import Adam from sklearn.preprocessing import MinMaxScaler def create_dataset(dataset,datasetClass,look_back): datax,dataY = [],[] for i in range(len(dataset)-look_back-1): a = dataset[i:(i+look_back),0] datax.append(a) dataY.append(datasetClass[:,(i+look_back):(i+look_back+1)]) return np.array(datax),np.array(dataY) def one_hot_encode(dataset): data = np.zeros((11,len(dataset)),dtype='int') for i in range(len(dataset)): data[dataset[i]-1,i] = 1 return data #Set a seed for repeatable results np.random.seed(12) dataframe = pd.read_csv('time-series.csv',usecols=[1],engine='python') dataset = dataframe.values dataset = dataset.astype('float32') dataframeClass = pd.read_csv('time-series-as-class.csv',engine='python') datasetClass = dataframeClass.values datasetClass = datasetClass.astype('int') datasetClass = one_hot_encode(datasetClass) #normalize input vals scaler = MinMaxScaler(feature_range=(0,1)) dataset = scaler.fit_transform(dataset) #separate to test/train train_size = int(len(dataset) * 0.67) test_size = len(dataset) - train_size train,test = dataset[0:train_size,:],dataset[train_size:len(dataset),:] trainClass,testClass = datasetClass[:,0:train_size,],datasetClass[:,train_size:len(dataset)] #set up sliding windows look_back = 150 trainX,trainY = create_dataset(train,trainClass,look_back) testX,testY = create_dataset(test,testClass,look_back) #reformat for proper passing to nn trainX = np.reshape(trainX,(trainX.shape[0],1,trainX.shape[1])) testX = np.reshape(testX,(testX.shape[0],testX.shape[1])) trainY = np.squeeze(trainY,2) testY = np.squeeze(testY,2) # create and fit the LSTM network model = Sequential() model.add(LSTM(15,input_shape=(1,look_back))) model.add(Dense(22,activation='tanh')) model.add(Dropout(0.2)) model.add(Dense(11,activation='softmax')) model.compile(loss='categorical_crossentropy',optimizer=Adam(),metrics=['categorical_accuracy']) print(model.summary()) model.fit(trainX,trainY,epochs=90,batch_size=1,verbose=2) # make predictions trainPredict = model.predict(trainX) testPredict = model.predict(testX)
我在Ubuntu和Windows上运行它.在keras v 2.0.4和2.0.8的Windows上测试,在ubuntu上使用2.0.5(最新版本通过conda提供)
窗口的准确度为17%,分类的交叉熵为2,它慢慢收敛,但它始终在那里开始
ubuntu的准确率为98%,分类交叉熵似乎为0,实际上并没有改变
唯一的代码区别是csv文件的路径,csv文件完全相同.什么可能导致如此巨大的差异?
如果差异是一两个百分点,我可以把它写成dropout / tf随机初始化,但是因为它太过纯粹的机会
编辑:解决方案被证明是修复分类csv文件,虽然它们是utf-8显然还有其它东西需要让它们在windows中创建时与linux一起玩.我不确定我是否可以将自己的答案标记为“已接受”
解决方法
版权声明:本文内容由互联网用户自发贡献,该文观点与技术仅代表作者本人。本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。如发现本站有涉嫌侵权/违法违规的内容, 请发送邮件至 dio@foxmail.com 举报,一经查实,本站将立刻删除。