Source code for qiskit.ml.datasets.gaussian

# -*- coding: utf-8 -*-

# This code is part of Qiskit.
#
# (C) Copyright IBM 2018, 2020.
#
# This code is licensed under the Apache License, Version 2.0. You may
# obtain a copy of this license in the LICENSE.txt file in the root directory
# of this source tree or at http://www.apache.org/licenses/LICENSE-2.0.
#
# Any modifications or derivative works of this code must retain this
# copyright notice, and modified files need to carry a notice indicating
# that they have been altered from the originals.

"""
gaussian dataset
"""

import numpy as np
from qiskit.aqua import aqua_globals


[docs]def gaussian(training_size, test_size, n, plot_data=False): """ returns gaussian dataset """ sigma = 1 if n == 2: class_labels = [r'A', r'B'] label_train = np.zeros(2 * (training_size + test_size)) sample_train = [] sample_a = [[0 for x in range(n)] for y in range(training_size + test_size)] sample_b = [[0 for x in range(n)] for y in range(training_size + test_size)] randomized_vector1 = aqua_globals.random.randint(2, size=n) randomized_vector2 = (randomized_vector1 + 1) % 2 for t_r in range(training_size + test_size): for feat in range(n): if randomized_vector1[feat] == 0: sample_a[t_r][feat] = aqua_globals.random.normal(-1 / 2, sigma, None) elif randomized_vector1[feat] == 1: sample_a[t_r][feat] = aqua_globals.random.normal(1 / 2, sigma, None) if randomized_vector2[feat] == 0: sample_b[t_r][feat] = aqua_globals.random.normal(-1 / 2, sigma, None) elif randomized_vector2[feat] == 1: sample_b[t_r][feat] = aqua_globals.random.normal(1 / 2, sigma, None) sample_train = [sample_a, sample_b] for lindex in range(training_size + test_size): label_train[lindex] = 0 for lindex in range(training_size + test_size): label_train[training_size + test_size + lindex] = 1 label_train = label_train.astype(int) sample_train = np.reshape(sample_train, (2 * (training_size + test_size), n)) training_input = {key: (sample_train[label_train == k, :])[:training_size] for k, key in enumerate(class_labels)} test_input = {key: (sample_train[label_train == k, :])[training_size:( training_size + test_size)] for k, key in enumerate(class_labels)} if plot_data: try: import matplotlib.pyplot as plt except ImportError: raise NameError('Matplotlib not installed. Please install it before plotting') for k in range(0, 2): plt.scatter(sample_train[label_train == k, 0][:training_size], sample_train[label_train == k, 1][:training_size]) plt.title("Gaussians") plt.show() return sample_train, training_input, test_input, class_labels elif n == 3: class_labels = [r'A', r'B', r'C'] label_train = np.zeros(3 * (training_size + test_size)) sample_train = [] sample_a = [[0 for x in range(n)] for y in range(training_size + test_size)] sample_b = [[0 for x in range(n)] for y in range(training_size + test_size)] sample_c = [[0 for x in range(n)] for y in range(training_size + test_size)] randomized_vector1 = aqua_globals.random.randint(3, size=n) randomized_vector2 = (randomized_vector1 + 1) % 3 randomized_vector3 = (randomized_vector2 + 1) % 3 for t_r in range(training_size + test_size): for feat in range(n): if randomized_vector1[feat] == 0: sample_a[t_r][feat] = aqua_globals.random.normal(2 * 1 * np.pi / 6, sigma, None) elif randomized_vector1[feat] == 1: sample_a[t_r][feat] = aqua_globals.random.normal(2 * 3 * np.pi / 6, sigma, None) elif randomized_vector1[feat] == 2: sample_a[t_r][feat] = aqua_globals.random.normal(2 * 5 * np.pi / 6, sigma, None) if randomized_vector2[feat] == 0: sample_b[t_r][feat] = aqua_globals.random.normal(2 * 1 * np.pi / 6, sigma, None) elif randomized_vector2[feat] == 1: sample_b[t_r][feat] = aqua_globals.random.normal(2 * 3 * np.pi / 6, sigma, None) elif randomized_vector2[feat] == 2: sample_b[t_r][feat] = aqua_globals.random.normal(2 * 5 * np.pi / 6, sigma, None) if randomized_vector3[feat] == 0: sample_c[t_r][feat] = aqua_globals.random.normal(2 * 1 * np.pi / 6, sigma, None) elif randomized_vector3[feat] == 1: sample_c[t_r][feat] = aqua_globals.random.normal(2 * 3 * np.pi / 6, sigma, None) elif randomized_vector3[feat] == 2: sample_c[t_r][feat] = aqua_globals.random.normal(2 * 5 * np.pi / 6, sigma, None) sample_train = [sample_a, sample_b, sample_c] for lindex in range(training_size + test_size): label_train[lindex] = 0 for lindex in range(training_size + test_size): label_train[training_size + test_size + lindex] = 1 for lindex in range(training_size + test_size): label_train[training_size + test_size + training_size + test_size + lindex] = 2 label_train = label_train.astype(int) sample_train = np.reshape(sample_train, (3 * (training_size + test_size), n)) training_input = {key: (sample_train[label_train == k, :])[:training_size] for k, key in enumerate(class_labels)} test_input = {key: (sample_train[label_train == k, :])[training_size:( training_size + test_size)] for k, key in enumerate(class_labels)} if plot_data: try: import matplotlib.pyplot as plt except ImportError: raise NameError('Matplotlib not installed. Please install it before plotting') for k in range(0, 3): plt.scatter(sample_train[label_train == k, 0][:training_size], sample_train[label_train == k, 1][:training_size]) plt.title("Gaussians") plt.show() return sample_train, training_input, test_input, class_labels else: raise ValueError("Gaussian presently only supports 2 or 3 qubits")