In [1]:
%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np

More evaluation methods for classification

In [2]:
from sklearn.datasets import load_digits
from sklearn.cross_validation import train_test_split
from sklearn.svm import LinearSVC

digits = load_digits()
X, y = digits.data, digits.target
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)

classifier = LinearSVC(random_state=0).fit(X_train, y_train)
y_test_pred = classifier.predict(X_test)

print("Accuracy: %f" % classifier.score(X_test, y_test))
Accuracy: 0.966667

Confusion matrices

In [3]:
from sklearn.metrics import confusion_matrix
confusion_matrix(y_test, y_test_pred)
Out[3]:
array([[42,  0,  0,  0,  1,  0,  0,  0,  0,  0],
       [ 0, 36,  0,  0,  0,  0,  0,  0,  0,  1],
       [ 0,  0, 38,  0,  0,  0,  0,  0,  0,  0],
       [ 0,  0,  0, 44,  0,  1,  0,  0,  1,  0],
       [ 0,  0,  0,  0, 55,  0,  0,  0,  0,  0],
       [ 0,  0,  1,  0,  0, 56,  1,  0,  0,  1],
       [ 0,  0,  0,  0,  0,  1, 44,  0,  0,  0],
       [ 0,  0,  0,  0,  0,  0,  0, 40,  0,  1],
       [ 0,  2,  0,  0,  0,  1,  0,  0, 35,  0],
       [ 0,  0,  0,  0,  0,  0,  0,  0,  3, 45]])
In [4]:
plt.imshow(confusion_matrix(y_test, y_test_pred), cmap="coolwarm", interpolation="None")
plt.colorbar()
plt.xlabel("Predicted label")
plt.xticks(range(10))
plt.yticks(range(10))
plt.ylabel("True label")
Out[4]:
<matplotlib.text.Text at 0x7f2b3d9bc5f8>

Binary tasks

In [5]:
y_even = y % 2
X_train, X_test, y_train, y_test = train_test_split(X, y_even, random_state=42)

classifier = LinearSVC().fit(X_train, y_train)
y_test_pred = classifier.predict(X_test)
In [6]:
confusion_matrix(y_test, y_test_pred)
Out[6]:
array([[209,  10],
       [ 31, 200]])

Binary confusion matrix:

True Positive (TP)False Negative (FN)
False Positive (FP) True Negative (TN)
$$ \text{precision} = \frac{TP}{FP + TP} $$$$ \text{recall} = \frac{TP}{FN + TP} $$$$ \text{accuracy} = \frac{TP + TN}{FP + FN + TP + TN} $$$$ f_1 = 2 \frac{\text{precision} \cdot \text{recall}}{\text{precision} + \text{recall}} $$
In [7]:
from sklearn.metrics import classification_report
print(classification_report(y_test, y_test_pred))
             precision    recall  f1-score   support

          0       0.87      0.95      0.91       219
          1       0.95      0.87      0.91       231

avg / total       0.91      0.91      0.91       450

In [ ]: