update

wangronin · Dec 19, 2023 · 8be71bf · 8be71bf
1 parent 9dcad99
commit 8be71bf
Show file tree

Hide file tree

Showing 10 changed files with 177 additions and 127 deletions.
diff --git a/hvd/newton.py b/hvd/newton.py
@@ -493,12 +493,18 @@ def __init__(
         self.n_eq = n_eq
         self.n_ieq = n_ieq
         self.func = func
-        self.jac = jac
+        self._jac = jac
         self.h = h
         self.h_jac = h_jac
         self.g = g
         self.g_jac = g_jac
         self._constrained = self.g is not None or self.h is not None
+        self.n_jac_evals = 0
+
+    def jac(self, x: np.ndarray) -> np.ndarray:
+        """Jacobian of the objective function"""
+        self.n_jac_evals += 1
+        return self._jac(x)
 
     def update(self, X: np.ndarray, compute_gradient: bool = True):
         self.X = X
@@ -850,11 +856,11 @@ def _compute_netwon_step(self) -> Tuple[np.ndarray, np.ndarray]:
                         -1 * np.linalg.lstsq(DR, R_list[r].reshape(-1, 1), rcond=None)[0].ravel()
                     )
         # heuristic: prevent the step-length to be too large
-        X = newton_step[:, : self.dim_p]
-        t = np.max(self.xu - self.xl) / 4
-        idx = np.linalg.norm(X, axis=1) >= t
-        X[idx] /= np.linalg.norm(X[idx], axis=1).reshape(-1, 1) / t
-        newton_step[:, : self.dim_p] = X
+        # X = newton_step[:, : self.dim_p]
+        # t = np.max(self.xu - self.xl) / 4
+        # idx = np.linalg.norm(X, axis=1) >= t
+        # X[idx] /= np.linalg.norm(X[idx], axis=1).reshape(-1, 1) / t
+        # newton_step[:, : self.dim_p] = X
         return newton_step, R
 
     def _shift_reference_set(self):
@@ -887,7 +893,21 @@ def _shift_reference_set(self):
         for i, k in enumerate(indices):
             n = self._eta[self.Y_label[k]]
             # NOTE: a larger initial shift is needed for ZDT6
-            v = 0.05 * n if self.iter_count > 0 else 0.06 * n  # the initial shift is a bit larger
+            if 11 < 2:
+                if self.iter_count == 0:
+                    m = self.active_indicator._medoids[k]
+                    a = n[1] / n[0]
+                    b = -1
+                    c = m[1] - (n[1] * m[0]) / n[0]
+                    idx = np.argmin(
+                        [abs(a * p[0] + b * p[1] + c) / np.sqrt(a**2 + b**2) for p in self._pareto_front]
+                    )
+                    eps = np.inner(self._pareto_front[idx] - m, n) * 1.03
+                    v = eps * n
+                else:
+                    v = 0.05 * n
+            else:
+                v = 0.05 * n if self.iter_count > 0 else 0.01 * n  # the initial shift is a bit larger
             self.active_indicator.shift_medoids(v, k)
 
         if self.iter_count == 0:  # record the initial medoids

diff --git a/hvd/problems/base.py b/hvd/problems/base.py
@@ -75,6 +75,7 @@ def ieq_constraint_hessian(self, x: np.ndarray) -> np.ndarray:
         return np.array(self._ieq_constraint_hessian(x))
 
 
+# TODO: unify this class with the `ConstrainedMOOAnalytical`
 class PymooProblemWithAD:
     def __init__(self, problem: Problem) -> None:
         self._problem = problem
@@ -84,15 +85,15 @@ def __init__(self, problem: Problem) -> None:
         self.n_ieq_constr = self._problem.n_ieq_constr + 2 * self.n_var
         self.xl = self._problem.xl
         self.xu = self._problem.xu
-        obj_func = partial(problem.__class__._evaluate, problem)
-        ieq_func = partial(PymooProblemWithAD.ieq_constraint, self)
-        self._objective_jacobian = jit(jacrev(obj_func))
-        self._objective_hessian = jit(hessian(obj_func))
+        self._obj_func = jit(partial(problem.__class__._evaluate, problem))
+        ieq_func = jit(partial(PymooProblemWithAD.ieq_constraint, self))
+        self._objective_jacobian = jit(jacrev(self._obj_func))
+        self._objective_hessian = jit(hessian(self._obj_func))
         self._ieq_jacobian = jit(jacfwd(ieq_func))
         self.CPU_time: int = 0  # measured in nanoseconds
 
     def objective(self, x: jnp.ndarray) -> jnp.ndarray:
-        return self._problem._evaluate(x)
+        return self._obj_func(x)
 
     def ieq_constraint(self, x: jnp.ndarray) -> jnp.ndarray:
         # box constraints are converted to inequality constraints

diff --git a/hvd/utils.py b/hvd/utils.py
@@ -7,6 +7,7 @@
 from typing import Any, Callable, Dict, Iterable, List, Sequence, Tuple, Union
 
 import numpy as np
+from jax import jit
 from scipy.linalg import cholesky, qr
 from sklearn.neighbors import LocalOutlierFactor
 
@@ -192,7 +193,7 @@ def __func__(ref, *arg, **kwargv):
 
 
 # TODO: this is too slow, improve the algorithm
-# @jit(nopython=True, error_model="numpy", cache=True)
+@jit
 def get_non_dominated(pareto_front: np.ndarray, return_index: bool = False, weakly_dominated: bool = True):
     """Find pareto front (undominated part) of the input performance data.
     Minimization is assumed

diff --git a/scripts/benchmark_DTLZ.py b/scripts/benchmark_DTLZ.py
@@ -42,7 +42,7 @@
 
 gen = 100
 path = f"./DTLZ-gen{gen}/"
-emoa = "NSGA-II"
+emoa = "NSGA-III"
 
 
 def plot(y0, Y, ref, hist_Y, history_medoids, hist_IGD, hist_R_norm, fig_name):
@@ -210,7 +210,7 @@ def execute(run: int):
 if gen == 110 and problem_name == "DTLZ4":
     run_id = list(set(run_id) - set([14]))
 
-if 11 < 2:
+if 1 < 2:
     for i in run_id:
         execute(i)
 else:

diff --git a/scripts/benchmark_EA.py b/scripts/benchmark_EA.py
@@ -17,11 +17,8 @@
 from pymoo.termination import get_termination
 from pymoo.util.ref_dirs import get_reference_directions
 from pymoo.util.reference_direction import UniformReferenceDirectionFactory
-from scipy.io import savemat
 
 from hvd.delta_p import GenerationalDistance, InvertedGenerationalDistance
-
-# from hvd.hypervolume import hypervolume
 from hvd.problems.base import MOOAnalytical
 
 # ref_point = np.array([11, 11])
@@ -113,12 +110,13 @@ def minimize(
     gd_value = GenerationalDistance(pareto_front).compute(Y=res.F)
     igd_value = InvertedGenerationalDistance(pareto_front).compute(Y=res.F)
     # return np.array([igd_value, gd_value, hypervolume(res.F, ref_point)])
+    # return np.array([igd_value, gd_value, np.sum(np.bitwise_or(res.X < 0, res.X > 1))])
     return np.array([igd_value, gd_value])
 
 
-def get_algorithm(n_objective: int, algorithm_name: str, constrained: bool) -> GeneticAlgorithm:
-    pop_size = 100 if n_objective == 2 else 300
-
+def get_algorithm(
+    n_objective: int, algorithm_name: str, pop_size: int, constrained: bool
+) -> GeneticAlgorithm:
     if algorithm_name == "NSGA-II":
         algorithm = NSGA2(pop_size=pop_size)
     elif algorithm_name == "NSGA-III":
@@ -146,22 +144,28 @@ def get_algorithm(n_objective: int, algorithm_name: str, constrained: bool) -> G
     return algorithm
 
 
-gen = 110
-n_gen = 1510 if gen == 110 else 100
-gen_func = lambda n_var, n_obj: 36 * n_obj + 10 * n_obj * n_var
+def get_Jacobian_calls(path, problem_name, algorithm_name, gen):
+    return int(np.median(pd.read_csv(f"{path}/{problem_name}-DpN-{algorithm_name}-{gen}.csv").Jac_calls))
+
 
+n_iter_newton = 5
+gen = 300
+gen_func = lambda n_var, scale: 4 * scale + 10 * n_var
 N = 30
 problem = sys.argv[1]
-for problem_name in [
-    problem,
-]:
+
+for problem_name in [problem]:
     print(problem_name)
     problem = get_problem(problem_name)
+    pop_size = 100 if problem.n_obj == 2 else 300
     constrained = problem.n_eq_constr > 0 or problem.n_ieq_constr > 0
-    termination = get_termination("n_gen", n_gen + 3 * gen_func(problem.n_var, problem.n_obj))
 
     for algorithm_name in ("NSGA-II",):
-        algorithm = get_algorithm(problem.n_obj, algorithm_name, constrained)
+        scale = int(
+            get_Jacobian_calls("./results", problem_name, algorithm_name, gen) / pop_size / n_iter_newton
+        )
+        termination = get_termination("n_gen", gen + n_iter_newton * gen_func(problem.n_var, scale))
+        algorithm = get_algorithm(problem.n_obj, algorithm_name, pop_size, constrained)
         data = Parallel(n_jobs=N)(
             delayed(minimize)(problem, algorithm, termination, run_id=i + 1, seed=i + 1, verbose=False)
             for i in range(N)

diff --git a/scripts/benchmark_ZDT.py b/scripts/benchmark_ZDT.py
@@ -40,9 +40,10 @@
 problem = PymooProblemWithAD(f)
 pareto_front = problem.get_pareto_front(1000)
 
-path = "./Gen1510/"
+# path = "./Gen1510/"
+path = "ZDT_gen_300"
 emoa = "NSGA-II"
-gen = 100
+gen = 300
 
 
 def plot(y0, Y, ref, hist_Y, history_medoids, hist_IGD, hist_R_norm, fig_name):
@@ -61,23 +62,24 @@ def plot(y0, Y, ref, hist_Y, history_medoids, hist_IGD, hist_R_norm, fig_name):
         handle.set_markersize(10)
 
     N = len(y0)
-    trajectory = np.array([y0] + hist_Y)
-    for i in range(N):
-        x, y = trajectory[:, i, 0], trajectory[:, i, 1]
-        ax1.quiver(
-            x[:-1],
-            y[:-1],
-            x[1:] - x[:-1],
-            y[1:] - y[:-1],
-            scale_units="xy",
-            angles="xy",
-            scale=1,
-            color="k",
-            width=0.003,
-            alpha=0.5,
-            headlength=4.5,
-            headwidth=2.5,
-        )
+    if 1 < 2:
+        trajectory = np.array([y0] + hist_Y)
+        for i in range(N):
+            x, y = trajectory[:, i, 0], trajectory[:, i, 1]
+            ax1.quiver(
+                x[:-1],
+                y[:-1],
+                x[1:] - x[:-1],
+                y[1:] - y[:-1],
+                scale_units="xy",
+                angles="xy",
+                scale=1,
+                color="k",
+                width=0.003,
+                alpha=0.5,
+                headlength=4.5,
+                headwidth=2.5,
+            )
 
     lines = []
     lines += ax1.plot(pareto_front[:, 0], pareto_front[:, 1], "g.", mec="none", ms=5, alpha=0.3)
@@ -156,6 +158,7 @@ def execute(run: int):
     x0 = x0[idx]
     y0 = y0[idx]
     Y_label = Y_label[idx]
+    y0 = np.array([problem.objective(_) for _ in x0])
     # if the number of clusters of `Y` is more than that of the reference set
     if len(np.unique(Y_label)) > len(ref):
         ref = np.vstack([r for r in ref.values()])
@@ -186,11 +189,14 @@ def execute(run: int):
     X, Y, _ = opt.run()
     # fig_name = f"./figure/{problem_name}_DpN_{emoa}_run{run}_{gen}.pdf"
     fig_name = f"{problem_name}_DpN_{emoa}_run{run}_{gen}.pdf"
-    # plot(y0, Y, all_ref, opt.hist_Y, opt.history_medoids, opt.hist_IGD, opt.hist_R_norm, fig_name)
+    plot(y0, Y, all_ref, opt.hist_Y, opt.history_medoids, opt.hist_IGD, opt.hist_R_norm, fig_name)
     gd_value = GenerationalDistance(pareto_front).compute(Y=Y)
     igd_value = InvertedGenerationalDistance(pareto_front).compute(Y=Y)
+    data = np.concatenate([np.c_[[0] * len(x0), y0], np.c_[[max_iters] * len(x0), opt.hist_Y[-1]]], axis=0)
+    df = pd.DataFrame(data, columns=["iteration", "f1", "f2"])
+    df.to_csv(f"tmp/{problem_name}_DpN_{emoa}_run{run}_{gen}.csv")
     # return np.array([igd_value, gd_value, hypervolume(Y, ref_point)])
-    return np.array([igd_value, gd_value])
+    return np.array([igd_value, gd_value, opt.state.n_jac_evals])
 
 
 # get all run IDs
@@ -203,6 +209,6 @@ def execute(run: int):
         execute(i)
 else:
     data = Parallel(n_jobs=n_jobs)(delayed(execute)(run=i) for i in run_id)
-    df = pd.DataFrame(np.array(data), columns=["IGD", "GD"])
+    df = pd.DataFrame(np.array(data), columns=["IGD", "GD", "Jac_calls"])
     # df = pd.DataFrame(np.array(data), columns=["IGD", "GD", "HV"])
     df.to_csv(f"{problem_name}-DpN-{emoa}-{gen}.csv", index=False)
diff --git a/scripts/compute_statistics.py b/scripts/compute_statistics.py
@@ -3,43 +3,66 @@
 from scipy.stats import mannwhitneyu
 from statsmodels.stats.multitest import multipletests
 
-gen = 110
-emoa = "NSGA-II"
-# out = []
-# dfs = []
+gen = 300
 pvalues = []
 stats = []
-func = lambda x: f"{np.median(x):.4e} +/- {np.std(x) / np.sqrt(len(x)):.4e}"
+stats_func = lambda x: [np.median(x), np.quantile(x, 0.9) - np.quantile(x, 0.1)]
+MOEAs = [
+    "NSGA-II",
+    # "NSGA-III",
+]
+problems = [
+    # "ZDT1",
+    # "ZDT2",
+    # "ZDT3",
+    "ZDT4",
+    # "ZDT6",
+    # "DTLZ1",
+    # "DTLZ2",
+    # "DTLZ3",
+    # "DTLZ4",
+    # "DTLZ5",
+    # "DTLZ6",
+    # "DTLZ7",
+]
 
-# problems = ["DTLZ1", "DTLZ2", "DTLZ3", "DTLZ4", "DTLZ5", "DTLZ6", "DTLZ7"]
-problems = ["ZDT1", "ZDT2", "ZDT3", "ZDT4", "ZDT6"]
-for problem in problems:
-    data1 = pd.read_csv(f"./results/{problem}-DpN-{emoa}-{gen}.csv")
-    data2 = pd.read_csv(f"./results/{problem}-{emoa}-{gen}.csv")
-    x, y = data1.IGD.values, data2.IGD.values
-    # df1 = data1.loc[:, ["IGD"]]
-    # df1.insert(0, "dataset_name", problem)
-    # df1.insert(0, "classifier_name", "DpN")
-    # df2 = data2.loc[:, ["IGD"]]
-    # df2.insert(0, "dataset_name", problem)
-    # df2.insert(0, "classifier_name", emoa)
-    # dfs.append(pd.concat([df1, df2], axis=0))
+for moea in MOEAs:
+    for problem in problems:
+        data1 = pd.read_csv(f"./results/{problem}-DpN-{moea}-{gen}.csv")
+        data2 = pd.read_csv(f"./results/{problem}-{moea}-{gen}.csv")
+        x, y = data1.IGD.values, data2.IGD.values
+        pvalue = mannwhitneyu(x=x, y=y, alternative="two-sided").pvalue
+        pvalues.append(pvalue)
+        stats.append([stats_func(x), stats_func(y)])
 
-    pvalue = mannwhitneyu(x=x, y=y, alternative="less").pvalue
-    pvalues.append(pvalue)
-    stats.append([func(x), func(y)])
-    # df1 = data1.apply(func, axis=0)
-    # df2 = data2.apply(lambda x: f"{np.median(x):.4e} +/- {np.std(x) / np.sqrt(len(x)):.4e}", axis=0)
-
-# df = pd.concat(dfs, axis=0)
-# df.rename(columns={"IGD": "accuracy"}, inplace=True)
-# df.to_csv("example.csv", index=False)
 reject, pvals_corrected, _, _ = multipletests(pvalues, alpha=0.05)
-res = np.c_[np.vstack([np.array(stats).T, [f"{p:.4e}" for p in pvals_corrected]]), ["", "", sum(reject)]]
-df = pd.DataFrame(
-    res,
-    columns=problems + ["Total"],
-    index=["Newton", emoa, "p-value"],
+win, tie, loose = 0, 0, 0
+for k, (n, m) in enumerate(stats):
+    x, y = n[0], m[0]
+    if reject[k]:
+        if x < y:
+            win += 1
+            s = "$\\uparrow$"
+        else:
+            loose += 1
+            s = "$\\downarrow$"
+    else:
+        tie += 1
+        s = "$\\leftrightarrow$"
+    stats[k] = [f"{n[0]:.4f}({n[1]:.4e}){s}", f"{m[0]:.4f}({m[1]:.4e})"]
+
+Method = np.repeat(MOEAs, len(problems)).reshape(-1, 1)
+Problem = np.tile(problems, len(MOEAs)).reshape(-1, 1)
+summary = ["+/$\\approx$/-", "", f"{win}/{tie}/{loose}", ""]
+data = pd.DataFrame(
+    np.vstack([np.hstack([Method, Problem, stats]), summary]), columns=["Method", "Problem", "Newton", "MOEA"]
 )
-df.to_csv(f"Newton-{emoa}-ZDT-{gen}.csv")
-df.to_latex(f"Newton-{emoa}-ZDT-{gen}.tex")
+print(data)
+caption = """Warmstarting the Newton method after 1\,510 iterations of the MOEA, we show the IGD values 
+(median and 10\\% - 90\\% quantile range) of the final Pareto fronts, averaged over
+30 independent runs. The Newton method is executed for five iterations, and the corresponding MOEA terminates
+with 4\,870 iterations on ZDTs and 3\,700 on DTLZs. Mann–Whitney U test (with 5\\% significance level) is 
+employed to compare the performance of the Newton method and the MOEA, where Holm-Sidak method is used to 
+adjust the $p$-value for multiple testing. 
+"""
+data.to_latex(f"Newton-{gen}.tex", index=False, caption=caption)