Skip to content
Snippets Groups Projects
Commit 18cbaee5 authored by Alexander Jason's avatar Alexander Jason
Browse files

fix: remake psi algorithm

parent 2b3a8b85
Branches
No related merge requests found
......@@ -8,17 +8,33 @@ def calculate_psi(expected, actual):
Args:
expected (np.array): Expected distribution (training data).
actual (np.array): Current distribution (new data).
bins (int): Number of bins for comparison.
bins (int, optional): Number of bins for comparison. If None, Doane's formula will be used.
Returns:
float: The PSI value.
"""
bins = 10
expected_hist, _ = np.histogram(expected, bins=bins)
actual_hist, _ = np.histogram(actual, bins=bins)
expected_perc = expected_hist / sum(expected_hist)
actual_perc = actual_hist / sum(actual_hist)
psi = np.sum((expected_perc - actual_perc) * np.log(expected_perc / actual_perc))
# Combine expected and actual datasets to determine bin edges
full_dataset = np.concatenate((expected, actual))
# Determine bin edges
bin_edges = np.linspace(min(min(expected), min(actual)), max(max(expected), max(actual)), 10)
# Calculate histograms for expected and actual distributions
expected_hist, _ = np.histogram(expected, bins=bin_edges)
actual_hist, _ = np.histogram(actual, bins=bin_edges)
# Convert counts to proportions
expected_proportions = expected_hist / np.sum(expected_hist)
actual_proportions = actual_hist / np.sum(actual_hist)
# Replace zero proportions to avoid division by zero or log of zero errors
expected_proportions = np.where(expected_proportions == 0, 1e-6, expected_proportions)
actual_proportions = np.where(actual_proportions == 0, 1e-6, actual_proportions)
# Calculate PSI
psi_values = (actual_proportions - expected_proportions) * np.log(actual_proportions / expected_proportions)
psi = np.sum(psi_values)
return psi
def log_psi(expected, actual):
......
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment