|
102 | 102 |
|
103 | 103 | print(f"{cv_results['test_score'].mean():.3f} +/- {cv_results['test_score'].std():.3f}")
|
104 | 104 |
|
| 105 | +# %% [markdown] |
| 106 | +# Roughly Balanced Bagging |
| 107 | +# ------------------------ |
| 108 | +# FIXME: narration based on [3]_. |
| 109 | + |
| 110 | +# %% |
| 111 | +from collections import Counter |
| 112 | +import numpy as np |
| 113 | +from imblearn import FunctionSampler |
| 114 | + |
| 115 | + |
| 116 | +def binomial_resampling(X, y): |
| 117 | + class_counts = Counter(y) |
| 118 | + majority_class = max(class_counts, key=class_counts.get) |
| 119 | + minority_class = min(class_counts, key=class_counts.get) |
| 120 | + |
| 121 | + n_minority_class = class_counts[minority_class] |
| 122 | + n_majority_resampled = np.random.negative_binomial(n_minority_class, 0.5) |
| 123 | + |
| 124 | + majority_indices = np.random.choice( |
| 125 | + np.flatnonzero(y == majority_class), |
| 126 | + size=n_majority_resampled, |
| 127 | + replace=True, |
| 128 | + ) |
| 129 | + minority_indices = np.random.choice( |
| 130 | + np.flatnonzero(y == minority_class), |
| 131 | + size=n_minority_class, |
| 132 | + replace=True, |
| 133 | + ) |
| 134 | + indices = np.hstack([majority_indices, minority_indices]) |
| 135 | + |
| 136 | + X_res, y_res = X[indices], y[indices] |
| 137 | + return X_res, y_res |
| 138 | + |
| 139 | + |
| 140 | +# Roughly Balanced Bagging |
| 141 | +rbb = BalancedBaggingClassifier(sampler=FunctionSampler(func=binomial_resampling)) |
| 142 | +cv_results = cross_validate(rbb, X, y, scoring="balanced_accuracy") |
| 143 | + |
| 144 | +print(f"{cv_results['test_score'].mean():.3f} +/- {cv_results['test_score'].std():.3f}") |
| 145 | + |
| 146 | + |
105 | 147 | # %% [markdown]
|
106 | 148 | # .. topic:: References:
|
107 | 149 | #
|
|
111 | 153 | # .. [2] S. Wang, and X. Yao. "Diversity analysis on imbalanced data sets by
|
112 | 154 | # using ensemble models." 2009 IEEE symposium on computational
|
113 | 155 | # intelligence and data mining. IEEE, 2009.
|
| 156 | +# |
| 157 | +# .. [3] S. Hido, H. Kashima, and Y. Takahashi. "Roughly balanced bagging |
| 158 | +# for imbalanced data." Statistical Analysis and Data Mining: The ASA |
| 159 | +# Data Science Journal 2.5‐6 (2009): 412-426. |
0 commit comments