# -*- coding: UTF-8 -*-
import csv
from sklearn.model_selection import train_test_split
# Read in the csv file and put features into list of dict and list of class label
DataSet = open(r'/home/ly/Desktop/CHY/SCIENCE_DATA/Data_Set_01labelDel0Col.csv', 'rb')
reader = csv.reader(DataSet) # 這個函式可以按行讀取內容
headers = reader.next() # 檔案的第一行,注釋掉會在featureList中把表頭也列印出來
# print(headers)
# 創建空串列
featureList = []
labelList = []
for row in reader:
labelList.append(row[len(row) - 1]) # 給labelList增加一列標簽值元素,將最后一列元素添加到labelList
rowDict = {}
for i in range(0, len(row) - 1): # 小回圈在大回圈里面,所以先回圈完小回圈,在繼續下一個大回圈
rowDict[i] = row[i] # row[i]表示某一行(row)的第i個數
featureList.append(rowDict)
FeatureList = []
for s in featureList:
ChangeStrToFloat1 = {}
for t in s:
ChangeStrToFloat1[t] = float(s[t])
FeatureList.append(ChangeStrToFloat1)
print FeatureList # <type'list'>
dummyY = [{0:0},{0:0},{0:0},{0:0},{0:0},{0:0},{0:0},{0:0},{0:0},{0:0},{0:0},{0:0},{0:0},{0:0},{0:0},{0:0},{0:0},{0:0},{0:0},{0:0},{0:0},{0:0},{0:0},{0:0},{0:0},{0:1},{0:1},{0:1},{0:1},{0:1},{0:1},{0:1},{0:1},{0:1},{0:1},{0:1},{0:1},{0:1},{0:1},{0:1},{0:1},{0:1},{0:1},{0:1},{0:1},{0:1},{0:1}]
X_train, X_test, y_train, y_test = train_test_split(FeatureList, dummyY, test_size=0.25, random_state=None)
錯誤提示:
X_train, X_test, y_train, y_test = train_test_split(FeatureList, dummyY, test_size=0.25, random_state=None)
ValueError: Found input variables with inconsistent numbers of samples: [3384, 47]
我想應該是FeatureList的問題吧,這個串列里有47個字典,每個字典里有72個元素,47*72=3384.
然而我還是不知道怎么修正.......
uj5u.com熱心網友回復:
FeatureList,dummyY,維數不一致轉載請註明出處,本文鏈接:https://www.uj5u.com/qita/83977.html
標籤:其他開發語言
