diff --git a/doc/whats_new/v0.5.rst b/doc/whats_new/v0.5.rst index 626614bda..3640ab610 100644 --- a/doc/whats_new/v0.5.rst +++ b/doc/whats_new/v0.5.rst @@ -6,6 +6,13 @@ Version 0.5 (under development) Changelog --------- +Documentation +............. + +- Correct the definition of the ratio when using a ``float`` in sampling + strategy for the over-sampling and under-sampling. + :issue:`525` by :user:`Ariel Rossanigo `. + Maintenance ........... diff --git a/examples/plot_sampling_strategy_usage.py b/examples/plot_sampling_strategy_usage.py index 315328b78..917a19bf6 100644 --- a/examples/plot_sampling_strategy_usage.py +++ b/examples/plot_sampling_strategy_usage.py @@ -96,7 +96,7 @@ def my_autopct(pct): ############################################################################### # For **over-sampling methods**, it correspond to the ratio -# :math:`\\alpha_{os}` defined by :math:`N_{rm} = \\alpha_{os} \\times N_{m}` +# :math:`\\alpha_{os}` defined by :math:`N_{rm} = \\alpha_{os} \\times N_{M}` # where :math:`N_{rm}` and :math:`N_{M}` are the number of samples in the # minority class after resampling and the number of samples in the majority # class, respectively. diff --git a/imblearn/over_sampling/base.py b/imblearn/over_sampling/base.py index 20f02bd4f..0d98e9a41 100644 --- a/imblearn/over_sampling/base.py +++ b/imblearn/over_sampling/base.py @@ -22,12 +22,11 @@ class BaseOverSampler(BaseSampler): Sampling information to resample the data set. - When ``float``, it corresponds to the desired ratio of the number of - samples in the majority class over the number of samples in the - minority class after resampling. Therefore, the ratio is expressed as - :math:`\\alpha_{os} = N_{M} / N_{rm}` where :math:`N_{rm}` and - :math:`N_{M}` are the number of samples in the minority class after - resampling and the number of samples in the majority class, - respectively. + samples in the minority class over the number of samples in the + majority class after resampling. Therefore, the ratio is expressed as + :math:`\\alpha_{os} = N_{rm} / N_{M}` where :math:`N_{rm}` is the + number of samples in the minority class after resampling and + :math:`N_{M}` is the number of samples in the majority class. .. warning:: ``float`` is only available for **binary** classification. An diff --git a/imblearn/under_sampling/base.py b/imblearn/under_sampling/base.py index e1fa377d9..a17a4b68f 100644 --- a/imblearn/under_sampling/base.py +++ b/imblearn/under_sampling/base.py @@ -20,12 +20,12 @@ class BaseUnderSampler(BaseSampler): Sampling information to sample the data set. - When ``float``, it corresponds to the desired ratio of the number of - samples in the majority class over the number of samples in the - minority class after resampling. Therefore, the ratio is expressed as - :math:`\\alpha_{us} = N_{rM} / N_{m}` where :math:`N_{rM}` and - :math:`N_{m}` are the number of samples in the majority class after - resampling and the number of samples in the minority class, - respectively. + samples in the minority class over the number of samples in the + majority class after resampling. Therefore, the ratio is expressed as + :math:`\\alpha_{us} = N_{m} / N_{rM}` where :math:`N_{m}` is the + number of samples in the minority class and + :math:`N_{rM}` is the number of samples in the majority class + after resampling. .. warning:: ``float`` is only available for **binary** classification. An