Source code for qiskit.aqua.components.multiclass_extensions.all_pairs

# -*- coding: utf-8 -*-

# This code is part of Qiskit.
#
# (C) Copyright IBM 2018, 2020.
#
# This code is licensed under the Apache License, Version 2.0. You may
# obtain a copy of this license in the LICENSE.txt file in the root directory
# of this source tree or at http://www.apache.org/licenses/LICENSE-2.0.
#
# Any modifications or derivative works of this code must retain this
# copyright notice, and modified files need to carry a notice indicating
# that they have been altered from the originals.

"""
The All-Pairs multiclass extension.
"""

import logging

import numpy as np
from sklearn.utils.multiclass import _ovr_decision_function

from .multiclass_extension import MulticlassExtension

logger = logging.getLogger(__name__)

# pylint: disable=invalid-name


[docs]class AllPairs(MulticlassExtension): """ The All-Pairs multiclass extension. In the **all-pairs** reduction, one trains :math:`k(k−1)/2` binary classifiers for a :math:`k`-way multiclass problem; each receives the samples of a pair of classes from the original training set, and must learn to distinguish these two classes. At prediction time, a **weighted voting scheme** is used: all :math:`k(k−1)/2` classifiers are applied to an unseen sample, and each class gets assigned the sum of all the scores obtained by the various classifiers. The combined classifier returns as a result the class getting the highest value. """ def __init__(self) -> None: super().__init__() self.classes_ = None self.estimators = None
[docs] def train(self, x, y): """ Training multiple estimators each for distinguishing a pair of classes. Args: x (numpy.ndarray): input points y (numpy.ndarray): input labels Raises: ValueError: can not be fit when only one class is present. """ self.classes_ = np.unique(y) if len(self.classes_) == 1: raise ValueError("can not be fit when only one class is present.") n_classes = self.classes_.shape[0] self.estimators = {} logger.info("Require %s estimators.", n_classes * (n_classes - 1) / 2) for i in range(n_classes): estimators_from_i = {} for j in range(i + 1, n_classes): estimator = self.estimator_cls(*self.params) cond = np.logical_or(y == i, y == j) indcond = np.arange(x.shape[0])[cond] x_filtered = x[indcond] y_filtered = y[indcond] y_filtered[y_filtered == i] = 0 y_filtered[y_filtered == j] = 1 estimator.fit(x_filtered, y_filtered) estimators_from_i[j] = estimator self.estimators[i] = estimators_from_i
[docs] def test(self, x, y): """ Testing multiple estimators each for distinguishing a pair of classes. Args: x (numpy.ndarray): input points y (numpy.ndarray): input labels Returns: float: accuracy """ A = self.predict(x) B = y _l = len(A) diff = np.sum(A != B) logger.debug("%d out of %d are wrong", diff, _l) return 1. - (diff * 1.0 / _l)
[docs] def predict(self, x): """ Applying multiple estimators for prediction. Args: x (numpy.ndarray): NxD array Returns: numpy.ndarray: predicted labels, Nx1 array """ predictions = [] confidences = [] for i in self.estimators: estimators_from_i = self.estimators[i] for j in estimators_from_i: estimator = estimators_from_i[j] confidence = np.ravel(estimator.decision_function(x)) indices = (confidence > 0).astype(np.int) prediction = self.classes_[indices] predictions.append(prediction.reshape(-1, 1)) confidences.append(confidence.reshape(-1, 1)) predictions = np.hstack(predictions) confidences = np.hstack(confidences) y = _ovr_decision_function(predictions, confidences, len(self.classes_)) return self.classes_[y.argmax(axis=1)]