Source code for kim.pre_analysis.pairwise_analysis
# Pairwise analysis using either mutual information or correlation coefficient
#
# Author: Peishi Jiang <shixijps@gmail.com>
import numpy as np
from .sst import shuffle_test
from .metric_calculator import MetricBase
from tqdm import tqdm
from jaxtyping import Array
[docs]
def pairwise_analysis(
xdata: Array, ydata: Array, metric_calculator: MetricBase, sst: bool=False,
ntest: int=100, alpha: float=0.05, n_jobs: int=-1, seed_shuffle: int=1234, verbose: int=0
):
"""Perform the pairwise analysis using either mutual information or correlation coefficient.
Args:
xdata (array-like): the predictors with shape (Ns, Nx)
ydata (array-like): the predictands with shape (Ns, Ny)
metric_calculator (class): the metric calculator
sst (bool): whether to perform statistical significance test. Defaults to False.
ntest (int): number of shuffled samples in sst. Defaults to 100.
alpha (float): the significance level. Defaults to 0.05.
n_jobs (int): the number of processers/threads used by joblib. Defaults to -1.
seed_shuffle (int): the random seed number for doing shuffle test. Defaults to 1234.
verbose (int): the verbosity level (0: normal, 1: debug). Defaults to 0.
Returns:
(array, array): the sensitivity, the sensitivity mask
"""
# Data dimensions
assert xdata.shape[0] == ydata.shape[0], \
"xdata and ydata must be the same number of samples"
# Ns = xdata.shape[0]
Nx = xdata.shape[1]
Ny = ydata.shape[1]
# Initialize the return sensitivity values and masks
sensitivity = np.zeros([Nx, Ny])
sensitivity_mask = np.ones([Nx, Ny], dtype='bool')
if verbose == 1:
print("Performing pairwise analysis to remove insensitive inputs ...")
for i in tqdm(range(Nx)):
x = xdata[:,i]
for j in range(Ny):
y = ydata[:,j]
if not sst:
sensitivity[i, j] = metric_calculator(x, y)
else:
metric, significance = shuffle_test(
x, y, metric_calculator, None, ntest, alpha,
n_jobs=n_jobs, random_seed=seed_shuffle
)
sensitivity[i, j] = metric
sensitivity_mask[i, j] = significance
return sensitivity, sensitivity_mask