%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np

More evaluation methods for classification¶

from sklearn.datasets import load_digits
from sklearn.cross_validation import train_test_split
from sklearn.svm import LinearSVC

digits = load_digits()
X, y = digits.data, digits.target
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)

classifier = LinearSVC(random_state=0).fit(X_train, y_train)
y_test_pred = classifier.predict(X_test)

print("Accuracy: %f" % classifier.score(X_test, y_test))

Accuracy: 0.966667

Confusion matrices¶

from sklearn.metrics import confusion_matrix
confusion_matrix(y_test, y_test_pred)

array([[42,  0,  0,  0,  1,  0,  0,  0,  0,  0],
       [ 0, 36,  0,  0,  0,  0,  0,  0,  0,  1],
       [ 0,  0, 38,  0,  0,  0,  0,  0,  0,  0],
       [ 0,  0,  0, 44,  0,  1,  0,  0,  1,  0],
       [ 0,  0,  0,  0, 55,  0,  0,  0,  0,  0],
       [ 0,  0,  1,  0,  0, 56,  1,  0,  0,  1],
       [ 0,  0,  0,  0,  0,  1, 44,  0,  0,  0],
       [ 0,  0,  0,  0,  0,  0,  0, 40,  0,  1],
       [ 0,  2,  0,  0,  0,  1,  0,  0, 35,  0],
       [ 0,  0,  0,  0,  0,  0,  0,  0,  3, 45]])

plt.imshow(confusion_matrix(y_test, y_test_pred), cmap="coolwarm", interpolation="None")
plt.colorbar()
plt.xlabel("Predicted label")
plt.xticks(range(10))
plt.yticks(range(10))
plt.ylabel("True label")

<matplotlib.text.Text at 0x7f2b3d9bc5f8>

Binary tasks¶

y_even = y % 2
X_train, X_test, y_train, y_test = train_test_split(X, y_even, random_state=42)

classifier = LinearSVC().fit(X_train, y_train)
y_test_pred = classifier.predict(X_test)

confusion_matrix(y_test, y_test_pred)

array([[209,  10],
       [ 31, 200]])

Binary confusion matrix:

True Positive (TP)	False Negative (FN)
False Positive (FP)	True Negative (TN)

$$ \text{precision} = \frac{TP}{FP + TP} $$$$ \text{recall} = \frac{TP}{FN + TP} $$$$ \text{accuracy} = \frac{TP + TN}{FP + FN + TP + TN} $$$$ f_1 = 2 \frac{\text{precision} \cdot \text{recall}}{\text{precision} + \text{recall}} $$

from sklearn.metrics import classification_report
print(classification_report(y_test, y_test_pred))

             precision    recall  f1-score   support

          0       0.87      0.95      0.91       219
          1       0.95      0.87      0.91       231

avg / total       0.91      0.91      0.91       450