卖小孩的咖啡 2020-07-21
#方式1:获取小规模的数据集 import sklearn.datasets as datasets iris = datasets.load_iris() #提取样本数据 feature = iris[‘data‘] target = iris[‘target‘] feature.shape (150, 4) target.shape (150,) target array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2]) #切分样本数据 from sklearn.model_selection import train_test_split x_train,x_test,y_train,y_test = train_test_split(feature,target,test_size=0.2,random_state=2020) x_train,y_train #训练集数据 x_test,y_test #测试集数据 #方式2:获取较大规模的数据集 datasets.fetch_20newsgroups(data_home=‘数据集保存路径‘,subset=‘all‘)