1 2 3 4 5 6 7 8 9 10 import pandas as pdimport seaborn as snsimport matplotlib.pyplot as pltfrom sklearn.ensemble import RandomForestClassifierfrom sklearn.svm import SVCfrom sklearn import svmfrom sklearn.neural_network import MLPClassifierfrom sklearn.metrics import confusion_matrix,classification_reportfrom sklearn.preprocessing import StandardScaler, LabelEncoderfrom sklearn.model_selection import train_test_split
1 2 wine = pd.read_csv("C:\\Users\\董润泽\\Desktop\\winequality-red.csv" ,sep=";" ) wine.head()
fixed acidity
volatile acidity
citric acid
residual sugar
chlorides
free sulfur dioxide
total sulfur dioxide
density
pH
sulphates
alcohol
quality
0
7.4
0.70
0.00
1.9
0.076
11.0
34.0
0.9978
3.51
0.56
9.4
5
1
7.8
0.88
0.00
2.6
0.098
25.0
67.0
0.9968
3.20
0.68
9.8
5
2
7.8
0.76
0.04
2.3
0.092
15.0
54.0
0.9970
3.26
0.65
9.8
5
3
11.2
0.28
0.56
1.9
0.075
17.0
60.0
0.9980
3.16
0.58
9.8
6
4
7.4
0.70
0.00
1.9
0.076
11.0
34.0
0.9978
3.51
0.56
9.4
5
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1599 entries, 0 to 1598
Data columns (total 12 columns):
fixed acidity 1599 non-null float64
volatile acidity 1599 non-null float64
citric acid 1599 non-null float64
residual sugar 1599 non-null float64
chlorides 1599 non-null float64
free sulfur dioxide 1599 non-null float64
total sulfur dioxide 1599 non-null float64
density 1599 non-null float64
pH 1599 non-null float64
sulphates 1599 non-null float64
alcohol 1599 non-null float64
quality 1599 non-null int64
dtypes: float64(11), int64(1)
memory usage: 150.0 KB
fixed acidity 0
volatile acidity 0
citric acid 0
residual sugar 0
chlorides 0
free sulfur dioxide 0
total sulfur dioxide 0
density 0
pH 0
sulphates 0
alcohol 0
quality 0
dtype: int64
1 2 3 4 bins = (2 , 6.5 , 8 ) group_names=['bad' , 'good' ] wine['quality' ] = pd.cut(wine['quality' ], bins = bins, labels = group_names) wine['quality' ].unique()
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-13-99255379e85c> in <module>
1 bins = (2, 6.5, 8)
2 group_names=['bad', 'good']
----> 3 wine['quality'] = pd.cut(wine['quality'], bins = bins, labels = group_names)
4 wine['quality'].unique()
C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\reshape\tile.py in cut(x, bins, right, labels, retbins, precision, include_lowest, duplicates)
239 include_lowest=include_lowest,
240 dtype=dtype,
--> 241 duplicates=duplicates)
242
243 return _postprocess_for_cut(fac, bins, retbins, x_is_series,
C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\reshape\tile.py in _bins_to_cuts(x, bins, right, labels, precision, include_lowest, dtype, duplicates)
342
343 side = 'left' if right else 'right'
--> 344 ids = ensure_int64(bins.searchsorted(x, side=side))
345
346 if include_lowest:
TypeError: '<' not supported between instances of 'float' and 'str'
fixed acidity
volatile acidity
citric acid
residual sugar
chlorides
free sulfur dioxide
total sulfur dioxide
density
pH
sulphates
alcohol
quality
0
7.4
0.70
0.00
1.9
0.076
11.0
34.0
0.9978
3.51
0.56
9.4
bad
1
7.8
0.88
0.00
2.6
0.098
25.0
67.0
0.9968
3.20
0.68
9.8
bad
2
7.8
0.76
0.04
2.3
0.092
15.0
54.0
0.9970
3.26
0.65
9.8
bad
3
11.2
0.28
0.56
1.9
0.075
17.0
60.0
0.9980
3.16
0.58
9.8
bad
4
7.4
0.70
0.00
1.9
0.076
11.0
34.0
0.9978
3.51
0.56
9.4
bad
1 label_quality = LabelEncoder()
1 wine['quality' ] = label_quality.fit_transform(wine['quality' ])
fixed acidity
volatile acidity
citric acid
residual sugar
chlorides
free sulfur dioxide
total sulfur dioxide
density
pH
sulphates
alcohol
quality
0
7.4
0.70
0.00
1.9
0.076
11.0
34.0
0.9978
3.51
0.56
9.4
0
1
7.8
0.88
0.00
2.6
0.098
25.0
67.0
0.9968
3.20
0.68
9.8
0
2
7.8
0.76
0.04
2.3
0.092
15.0
54.0
0.9970
3.26
0.65
9.8
0
3
11.2
0.28
0.56
1.9
0.075
17.0
60.0
0.9980
3.16
0.58
9.8
0
4
7.4
0.70
0.00
1.9
0.076
11.0
34.0
0.9978
3.51
0.56
9.4
0
5
7.4
0.66
0.00
1.8
0.075
13.0
40.0
0.9978
3.51
0.56
9.4
0
6
7.9
0.60
0.06
1.6
0.069
15.0
59.0
0.9964
3.30
0.46
9.4
0
7
7.3
0.65
0.00
1.2
0.065
15.0
21.0
0.9946
3.39
0.47
10.0
1
8
7.8
0.58
0.02
2.0
0.073
9.0
18.0
0.9968
3.36
0.57
9.5
1
9
7.5
0.50
0.36
6.1
0.071
17.0
102.0
0.9978
3.35
0.80
10.5
0
1 wine['quality' ].value_counts()
0 1382
1 217
Name: quality, dtype: int64
1 sns.countplot(wine['quality' ])
<matplotlib.axes._subplots.AxesSubplot at 0x2076c534cc0>
1 2 3 X = wine.drop('quality' , axis=1 ) y = wine['quality' ]
1 2 X_train, X_test, y_train, y_test=train_test_split(X, y, test_size=0.2 , random_state = 42 )
1 2 3 sc = StandardScaler() X_train = sc.fit_transform(X_train) X_test = sc.transform(X_test)
Random Forest Classifier 1 2 3 rfc = RandomForestClassifier(n_estimators=600 ) rfc.fit(X_train,y_train) pred_rfc = rfc.predict(X_test)
1 2 3 4 print(classification_report(y_test, pred_rfc)) print(confusion_matrix(y_test,pred_rfc))
precision recall f1-score support
0 0.92 0.97 0.94 273
1 0.73 0.51 0.60 47
accuracy 0.90 320
macro avg 0.82 0.74 0.77 320
weighted avg 0.89 0.90 0.89 320
[[264 9]
[ 23 24]]
SVM classifier 1 2 3 clf=svm.SVC() clf.fit(X_train, y_train) pred_clf = clf.predict(X_test)
1 2 print(classification_report(y_test, pred_clf)) print(confusion_matrix(y_test,pred_clf))
precision recall f1-score support
0 0.88 0.98 0.93 273
1 0.71 0.26 0.37 47
accuracy 0.88 320
macro avg 0.80 0.62 0.65 320
weighted avg 0.86 0.88 0.85 320
[[268 5]
[ 35 12]]
Neural Network 1 2 3 mlpc = MLPClassifier(hidden_layer_sizes=(11 ,11 ,11 ),max_iter=500 ) mlpc.fit(X_train,y_train) pred_mlpc = mlpc.predict(X_test)
C:\ProgramData\Anaconda3\lib\site-packages\sklearn\neural_network\multilayer_perceptron.py:566: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.
% self.max_iter, ConvergenceWarning)
1 2 print(classification_report(y_test, pred_clf)) print(confusion_matrix(y_test,pred_clf))
precision recall f1-score support
0 0.88 0.98 0.93 273
1 0.71 0.26 0.37 47
accuracy 0.88 320
macro avg 0.80 0.62 0.65 320
weighted avg 0.86 0.88 0.85 320
[[268 5]
[ 35 12]]
1 2 3 4 from sklearn.metrics import accuracy_scoreprint(accuracy_score(y_test,pred_rfc)) print(accuracy_score(y_test,pred_mlpc)) print(accuracy_score(y_test,pred_clf))
0.875
0.884375
0.875
转载请注明来源,欢迎对文章中的引用来源进行考证,欢迎指出任何有错误或不够清晰的表达。可以在下面评论区评论,也可以邮件至 2470290795@qq.com