ValueError: X.shape[1] = 2 应该等于4，训练时的特征数

如何解决ValueError: X.shape[1] = 2 应该等于4，训练时的特征数

我是 Python 新手。我对 numpy 和 array 有一点了解。在我的工作中，我有 1000 行 × 5 列的数据集。前三列作为 X_set，最后一列作为 y_set。但是，当我绘制可视化训练集结果时，我得到了错误。错误如下

  ValueError: X.shape[1] = 2 should be equal to 4,the number of features at training time

这是我的脚本 svm

import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import sklearn as svm

data = pd.read_csv('overdischarge.csv')
data
#Getting features from dataset
data=data.sample(frac=1)
X=data.iloc[:,:-1].values
y=data.iloc[:,-1].values
X=X.astype(float)
#training and testing set size
train_size=int(0.75*data.shape[0])
test_size=int(0.25*data.shape[0])
print("Training set size : "+ str(train_size))
print("Testing set size : "+str(test_size))

from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size = 0.25,random_state = 0)

from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

from sklearn.svm import SVC
classifier = SVC(kernel = 'linear',random_state = 0)
classifier.fit(X_train,y_train)

#training set split
X_train=X[0:train_size,:]
y_train=y[0:train_size]
#testing set split 
X_test=X[train_size:,:] 
y_test=y[train_size:]

y_pred = classifier.predict(X_test)
print(np.concatenate((y_pred.reshape(len(y_pred),1),y_test.reshape(len(y_test),1)),1))

from sklearn.metrics import confusion_matrix,accuracy_score
cm = confusion_matrix(y_test,y_pred)
print(cm)
accuracy_score(y_test,y_pred)

#visualize the training set 
from matplotlib.colors import ListedColormap
X_set,y_set = X_train,y_train
for i,j in enumerate(np.unique(y_set)):
      plt.scatter(X_set[y_set == j,0],X_set[y_set == j,1],2],c = ListedColormap(('green','orange','red'))(i),label = j,marker='.')
plt.title('Training set Overdischarge')
plt.xlabel('rpn')
plt.ylabel('ALARM')
plt.legend()
plt.show()

# Visualising the Training set results
from matplotlib.colors import ListedColormap
X_set,y_train
X1,X2 = np.meshgrid(np.arange(start = X_set[:,0].min() - 1,stop = X_set[:,0].max() + 1,step = 0.1),np.arange(start = X_set[:,1].min() - 1,1].max() + 1,step = 0.1))
plt.contourf(X1,X2,classifier.predict(np.array([X1.ravel(),X2.ravel()]).T).reshape(X1.shape),alpha = 0.85,cmap = ListedColormap(('green','yellow','red')))
plt.xlim(X1.min(),X1.max())
plt.ylim(X2.min(),X2.max())
for i,j in enumerate(np.unique(y_set)):
       plt.scatter(X_set[y_set == j,marker='.')
plt.title('Training set')
plt.xlabel('ALARM')
plt.ylabel('rpn')
plt.legend()
plt.show()

当我运行最后一个脚本时，我无法绘制训练结果并出现这样的错误

ValueError                                Traceback (most recent call last)
<ipython-input-24-d51e66447620> in <module>()
  4 X1,5          np.arange(start = X_set[:,step = 0.1))
 ----> 6 plt.contourf(X1,7              alpha = 0.85,'red')))
  8 plt.xlim(X1.min(),X1.max())

 2 frames
 /usr/local/lib/python3.6/dist-packages/sklearn/svm/_base.py in _validate_for_predict(self,X)
 465             raise ValueError("X.shape[1] = %d should be equal to %d,"
 466                              "the number of features at training time" %
 --> 467                              (n_features,self.shape_fit_[1]))
 468         return X
 469 

 ValueError: X.shape[1] = 2 should be equal to 4,the number of features at training time

这是我的数据集

   SEV  OCC DET rpn ALARM
 0  1   2   10  20  0
 1  2   3   9   54  0
 2  3   4   8   96  0
 3  4   5   7   140 0
 4  5   6   6   180 0
... ... ... ... ... ...
995 8   7   5   280 1
996 9   8   4   288 1
997 10  9   3   270 1
998 1   10  2   20  0
999 2   10  1   20  0

为什么我会出错？ numpy 或数组有问题吗？请给我建议。提前致谢。