Source code for qiskit.aqua.components.multiclass_extensions.all_pairs

# -*- coding: utf-8 -*-

# This code is part of Qiskit.
#
# (C) Copyright IBM 2018, 2020.
#
# This code is licensed under the Apache License, Version 2.0. You may
# obtain a copy of this license in the LICENSE.txt file in the root directory
# of this source tree or at http://www.apache.org/licenses/LICENSE-2.0.
#
# Any modifications or derivative works of this code must retain this
# copyright notice, and modified files need to carry a notice indicating
# that they have been altered from the originals.

"""
The All-Pairs multiclass extension.
"""

import logging

import numpy as np
from sklearn.utils.multiclass import _ovr_decision_function

from .multiclass_extension import MulticlassExtension

logger = logging.getLogger(__name__)

# pylint: disable=invalid-name


[docs]class AllPairs(MulticlassExtension):
    """
    The All-Pairs multiclass extension.

    In the **all-pairs** reduction, one trains :math:`k(k−1)/2` binary classifiers for a
    :math:`k`-way multiclass problem; each receives the samples of a pair of classes from the
    original training set, and must learn to distinguish these two classes. At prediction time,
    a **weighted voting scheme** is used: all :math:`k(k−1)/2` classifiers are applied to an unseen
    sample, and each class gets assigned the sum of all the scores obtained by the various
    classifiers. The combined classifier returns as a result the class getting the highest value.
    """

    def __init__(self) -> None:
        super().__init__()
        self.classes_ = None
        self.estimators = None

[docs]    def train(self, x, y):
        """
        Training multiple estimators each for distinguishing a pair of classes.

        Args:
            x (numpy.ndarray): input points
            y (numpy.ndarray): input labels
        Raises:
            ValueError: can not be fit when only one class is present.
        """
        self.classes_ = np.unique(y)
        if len(self.classes_) == 1:
            raise ValueError("can not be fit when only one class is present.")
        n_classes = self.classes_.shape[0]
        self.estimators = {}
        logger.info("Require %s estimators.", n_classes * (n_classes - 1) / 2)
        for i in range(n_classes):
            estimators_from_i = {}
            for j in range(i + 1, n_classes):
                estimator = self.estimator_cls(*self.params)
                cond = np.logical_or(y == i, y == j)
                indcond = np.arange(x.shape[0])[cond]
                x_filtered = x[indcond]
                y_filtered = y[indcond]
                y_filtered[y_filtered == i] = 0
                y_filtered[y_filtered == j] = 1
                estimator.fit(x_filtered, y_filtered)
                estimators_from_i[j] = estimator
            self.estimators[i] = estimators_from_i

[docs]    def test(self, x, y):
        """
        Testing multiple estimators each for distinguishing a pair of classes.

        Args:
            x (numpy.ndarray): input points
            y (numpy.ndarray): input labels

        Returns:
            float: accuracy
        """
        A = self.predict(x)
        B = y
        _l = len(A)
        diff = np.sum(A != B)
        logger.debug("%d out of %d are wrong", diff, _l)
        return 1. - (diff * 1.0 / _l)

[docs]    def predict(self, x):
        """
        Applying multiple estimators for prediction.

        Args:
            x (numpy.ndarray): NxD array
        Returns:
            numpy.ndarray: predicted labels, Nx1 array
        """
        predictions = []
        confidences = []
        for i in self.estimators:
            estimators_from_i = self.estimators[i]
            for j in estimators_from_i:
                estimator = estimators_from_i[j]
                confidence = np.ravel(estimator.decision_function(x))

                indices = (confidence > 0).astype(np.int)
                prediction = self.classes_[indices]

                predictions.append(prediction.reshape(-1, 1))
                confidences.append(confidence.reshape(-1, 1))

        predictions = np.hstack(predictions)
        confidences = np.hstack(confidences)
        y = _ovr_decision_function(predictions,
                                   confidences, len(self.classes_))
        return self.classes_[y.argmax(axis=1)]