Skip to content

Commit

Permalink
seems to work!
Browse files Browse the repository at this point in the history
  • Loading branch information
gykovacs committed Oct 11, 2024
1 parent 7d760dc commit 3fde74e
Show file tree
Hide file tree
Showing 27 changed files with 4,843 additions and 3,562 deletions.
57 changes: 56 additions & 1 deletion mlscorecheck/auc/_acc_aggregated.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,14 +11,15 @@

from ._utils import prepare_intervals, translate_folding

from ._acc_single import acc_min, acc_max
from ._acc_single import acc_min, acc_max, acc_onmax
from ._auc_aggregated import R, check_cvxopt

__all__ = [
"acc_min_aggregated",
"acc_max_aggregated",
"acc_rmin_aggregated",
"acc_rmax_aggregated",
"acc_onmax_aggregated",
"acc_from_aggregated",
"acc_lower_from_aggregated",
"acc_upper_from_aggregated",
Expand Down Expand Up @@ -441,6 +442,60 @@ def acc_rmax_aggregated(
return acc_rmax_solve(ps, ns, auc, return_solutions)


def acc_onmax_aggregated(
auc: float, ps: np.array, ns: np.array, return_solutions: bool = False
):
"""
The one-node curves based maximum accuracy
Args:
auc (float): the average accuracy
ps (np.array): the number of positive samples
ns (np.array): the number of negative samples
return_solutions (bool): whether to return the solutions to the
underlying optimization problem
Returns:
float | (float, np.array, np.array, np.array, np.array, np.array): the
mean accuracy, or the mean accuracy, the auc parameters, the vectors of
ps, ns, and the lower bounds and upper bounds
Raises:
ValueError: when auc < 0.5 or no optimal solution is found
"""

if auc < 0.5:
raise ValueError("auc too small (acc_onmax_aggregated)")

ps = np.array(ps)
ns = np.array(ns)

k = len(ps)

mins = np.array([min(p, n) for p, n in zip(ps, ns)])

weights = mins / (ps + ns)

lower_bounds = np.repeat(0.5, k)
upper_bounds = np.repeat(1.0, k)

sorting = np.argsort(weights)[::-1]

ps = ps[sorting]
ns = ns[sorting]

aucs = R(auc, k, lower_bounds, upper_bounds)

accs = np.array([acc_onmax(auc, p, n) for auc, p, n in zip(aucs, ps, ns)])

results = float(np.mean(accs))

if return_solutions:
results = results, (aucs, ps, ns, lower_bounds, upper_bounds)

return results


def acc_lower_from_aggregated(
*,
scores: dict,
Expand Down
102 changes: 98 additions & 4 deletions mlscorecheck/auc/_acc_single.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,13 @@
"acc_min",
"acc_rmin",
"acc_max",
"acc_max_grad",
"acc_rmax",
"acc_rmax_grad",
"acc_onmax",
"acc_onmax_grad",
"macc_min",
"macc_min_grad",
]


Expand Down Expand Up @@ -74,6 +79,21 @@ def acc_max(auc, p, n):
return (auc * min(p, n) + max(p, n)) / (p + n)


def acc_max_grad(auc, p, n):
"""
The gradient of maximum accuracy given an AUC
Args:
auc (float): upper bound on AUC
p (int): the number of positive test samples
n (int): the number of negative test samples
Returns:
float: the accuracy
"""
return min(p, n) / (p + n)


def acc_rmax(auc, p, n):
"""
The maximum accuracy on a regulated minimum curve given an AUC
Expand All @@ -94,6 +114,58 @@ def acc_rmax(auc, p, n):
return (max(p, n) + min(p, n) * np.sqrt(2 * (auc - 0.5))) / (p + n)


def acc_rmax_grad(auc, p, n):
"""
The gradient of regulated maximum accuracy given an AUC
Args:
auc (float): upper bound on AUC
p (int): the number of positive test samples
n (int): the number of negative test samples
Returns:
float: the accuracy
"""
return np.sqrt(2) * min(p, n) / 2 / (np.sqrt(auc - 0.5) * (p + n))


def acc_onmax(auc, p, n):
"""
The maximum accuracy on a one node curve given an AUC
Args:
auc (float): upper bound on AUC
p (int): the number of positive test samples
n (int): the number of negative test samples
Returns:
float: the accuracy
Raises:
ValueError: when auc < 0.5
"""

if auc < 0.5:
raise ValueError("auc too small for acc_onmax")

return (2 * auc * min(p, n) + max(p, n) - min(p, n)) / (p + n)


def acc_onmax_grad(auc, p, n):
"""
The gradient of one node maximum accuracy given an AUC
Args:
auc (float): upper bound on AUC
p (int): the number of positive test samples
n (int): the number of negative test samples
Returns:
float: the accuracy
"""
return 2 * min(p, n) / (p + n)


def macc_min(auc, p, n):
"""
The minimum of the maximum accuracy
Expand All @@ -112,6 +184,24 @@ def macc_min(auc, p, n):
return max(p, n) / (p + n)


def macc_min_grad(auc, p, n):
"""
The gradient of the minimum maximum accuracy
Args:
fpr (float): upper bound on false positive rate
tpr (float): lower bound on true positive rate
Returns:
float: the gradient magnitude
"""

if auc >= 1 - min(p, n) / (2 * max(p, n)):
return n * p / ((n + p) * np.sqrt(-2 * auc * n * p + 2 * n * p))

return 0.0


def acc_lower_from(*, scores: dict, eps: float, p: int, n: int, lower: str = "min"):
"""
This function applies the lower bound estimation schemes to estimate
Expand Down Expand Up @@ -157,7 +247,7 @@ def acc_upper_from(*, scores: dict, eps: float, p: int, n: int, upper: str = "ma
eps (float): the numerical uncertainty
p (int): the number of positive samples
n (int): the number of negative samples
upper (str): 'max'/'rmax' - the type of upper bound
upper (str): 'max'/'rmax'/'onmax' - the type of upper bound
Returns:
float: the upper bound for the accuracy
Expand All @@ -176,6 +266,8 @@ def acc_upper_from(*, scores: dict, eps: float, p: int, n: int, upper: str = "ma
upper0 = acc_max(intervals["auc"][1], p, n)
elif upper == "rmax":
upper0 = acc_rmax(intervals["auc"][1], p, n)
elif upper == "onmax":
upper0 = acc_onmax(intervals["auc"][1], p, n)
else:
raise ValueError(f"unsupported upper bound {upper}")

Expand All @@ -193,7 +285,7 @@ def acc_from(
eps (float): the numerical uncertainty
p (int): the number of positive samples
n (int): the number of negative samples
lower (str): 'min'/'rmin'
lower (str): 'min'/'rmin'/'onmax'
upper (str): 'max'/'rmax' - the type of upper bound
Returns:
Expand Down Expand Up @@ -253,7 +345,7 @@ def max_acc_upper_from(*, scores: dict, eps: float, p: int, n: int, upper: str =
eps (float): the numerical uncertainty
p (int): the number of positive samples
n (int): the number of negative samples
upper (str): 'max'/'rmax' - the type of upper bound
upper (str): 'max'/'rmax'/'onmax' - the type of upper bound
Returns:
float: the upper bound for the maximum accuracy
Expand All @@ -272,6 +364,8 @@ def max_acc_upper_from(*, scores: dict, eps: float, p: int, n: int, upper: str =
upper0 = acc_max(intervals["auc"][1], p, n)
elif upper == "rmax":
upper0 = acc_rmax(intervals["auc"][1], p, n)
elif upper == "onmax":
upper0 = acc_onmax(intervals["auc"][1], p, n)
else:
raise ValueError(f"unsupported upper bound {upper}")

Expand All @@ -291,7 +385,7 @@ def max_acc_from(
p (int): the number of positive samples
n (int): the number of negative samples
lower (str): 'min'
upper (str): 'max'/'rmax' - the type of upper bound
upper (str): 'max'/'rmax'/'onmax' - the type of upper bound
Returns:
tuple(float, float): the interval for the maximum accuracy
Expand Down
39 changes: 36 additions & 3 deletions mlscorecheck/auc/_auc_aggregated.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
__all__ = [
"auc_min_aggregated",
"auc_max_aggregated",
"auc_onmin_aggregated",
"auc_rmin_aggregated",
"auc_maxa_evaluate",
"auc_maxa_solve",
Expand Down Expand Up @@ -371,6 +372,36 @@ def auc_max_aggregated(
return results


def auc_onmin_aggregated(
fpr: float, tpr: float, k: int, return_solutions: bool = False
) -> float:
"""
The average area under the onmin curves at the average fpr, tpr
Args:
fpr (list): lower bound on average false positive rate
tpr (list): upper bound on average true positive rate
return_solutions (bool): whether to return the solutions for the
underlying curves
Returns:
float | (float, np.array, np.array, np.array, np.array): the area or the area, the
solutions and the bounds
"""

results = float((1 - fpr + tpr) / 2.0)

if return_solutions:
results = results, (
np.repeat(fpr, k),
np.repeat(tpr, k),
np.repeat(0.0, k),
np.repeat(1.0, k),
)

return results


def auc_rmin_aggregated(
fpr: float, tpr: float, k: int, return_solutions: bool = False
) -> float:
Expand Down Expand Up @@ -745,7 +776,7 @@ def check_applicability_lower_aggregated(intervals: dict, lower: str, ps: int, n
ValueError: when the methods are not applicable with the
specified scores
"""
if lower in ["min", "rmin"]:
if lower in ["min", "rmin", "onmin"]:
if "fpr" not in intervals or "tpr" not in intervals:
raise ValueError("fpr, tpr or their complements must be specified")
if lower in ["amin", "armin"]:
Expand Down Expand Up @@ -805,7 +836,7 @@ def auc_lower_from_aggregated(
ps and ns, contains the keys 'p', 'n', 'n_repeats',
'n_folds', 'folding' (currently 'stratified_sklearn'
supported for 'folding')
lower (str): ('min'/'rmin'/'amin'/'armin') - the type of
lower (str): ('min'/'rmin'/'amin'/'armin'/'onmin') - the type of
estimation for the lower bound
Returns:
Expand Down Expand Up @@ -833,6 +864,8 @@ def auc_lower_from_aggregated(

if lower == "min":
lower0 = auc_min_aggregated(intervals["fpr"][1], intervals["tpr"][0], k)
elif lower == "onmin":
lower0 = auc_onmin_aggregated(intervals["fpr"][1], intervals["tpr"][0], k)
elif lower == "rmin":
lower0 = auc_rmin_aggregated(intervals["fpr"][0], intervals["tpr"][1], k)
elif lower == "amin":
Expand Down Expand Up @@ -931,7 +964,7 @@ def auc_from_aggregated(
ps and ns, contains the keys 'p', 'n', 'n_repeats',
'n_folds', 'folding' (currently 'stratified_sklearn'
supported for 'folding')
lower (str): ('min'/'rmin'/'amin'/'armin') - the type of
lower (str): ('min'/'rmin'/'amin'/'armin'/'onmin') - the type of
estimation for the lower bound
upper (str): ('max'/'maxa'/'amax') - the type of estimation for
the upper bound
Expand Down
Loading

0 comments on commit 3fde74e

Please sign in to comment.