Skip to content

Commit

Permalink
update
Browse files Browse the repository at this point in the history
  • Loading branch information
wangronin committed Dec 19, 2023
1 parent 9dcad99 commit 8be71bf
Show file tree
Hide file tree
Showing 10 changed files with 177 additions and 127 deletions.
34 changes: 27 additions & 7 deletions hvd/newton.py
Original file line number Diff line number Diff line change
Expand Up @@ -493,12 +493,18 @@ def __init__(
self.n_eq = n_eq
self.n_ieq = n_ieq
self.func = func
self.jac = jac
self._jac = jac
self.h = h
self.h_jac = h_jac
self.g = g
self.g_jac = g_jac
self._constrained = self.g is not None or self.h is not None
self.n_jac_evals = 0

def jac(self, x: np.ndarray) -> np.ndarray:
"""Jacobian of the objective function"""
self.n_jac_evals += 1
return self._jac(x)

def update(self, X: np.ndarray, compute_gradient: bool = True):
self.X = X
Expand Down Expand Up @@ -850,11 +856,11 @@ def _compute_netwon_step(self) -> Tuple[np.ndarray, np.ndarray]:
-1 * np.linalg.lstsq(DR, R_list[r].reshape(-1, 1), rcond=None)[0].ravel()
)
# heuristic: prevent the step-length to be too large
X = newton_step[:, : self.dim_p]
t = np.max(self.xu - self.xl) / 4
idx = np.linalg.norm(X, axis=1) >= t
X[idx] /= np.linalg.norm(X[idx], axis=1).reshape(-1, 1) / t
newton_step[:, : self.dim_p] = X
# X = newton_step[:, : self.dim_p]
# t = np.max(self.xu - self.xl) / 4
# idx = np.linalg.norm(X, axis=1) >= t
# X[idx] /= np.linalg.norm(X[idx], axis=1).reshape(-1, 1) / t
# newton_step[:, : self.dim_p] = X
return newton_step, R

def _shift_reference_set(self):
Expand Down Expand Up @@ -887,7 +893,21 @@ def _shift_reference_set(self):
for i, k in enumerate(indices):
n = self._eta[self.Y_label[k]]
# NOTE: a larger initial shift is needed for ZDT6
v = 0.05 * n if self.iter_count > 0 else 0.06 * n # the initial shift is a bit larger
if 11 < 2:
if self.iter_count == 0:
m = self.active_indicator._medoids[k]
a = n[1] / n[0]
b = -1
c = m[1] - (n[1] * m[0]) / n[0]
idx = np.argmin(
[abs(a * p[0] + b * p[1] + c) / np.sqrt(a**2 + b**2) for p in self._pareto_front]
)
eps = np.inner(self._pareto_front[idx] - m, n) * 1.03
v = eps * n
else:
v = 0.05 * n
else:
v = 0.05 * n if self.iter_count > 0 else 0.01 * n # the initial shift is a bit larger
self.active_indicator.shift_medoids(v, k)

if self.iter_count == 0: # record the initial medoids
Expand Down
11 changes: 6 additions & 5 deletions hvd/problems/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,7 @@ def ieq_constraint_hessian(self, x: np.ndarray) -> np.ndarray:
return np.array(self._ieq_constraint_hessian(x))


# TODO: unify this class with the `ConstrainedMOOAnalytical`
class PymooProblemWithAD:
def __init__(self, problem: Problem) -> None:
self._problem = problem
Expand All @@ -84,15 +85,15 @@ def __init__(self, problem: Problem) -> None:
self.n_ieq_constr = self._problem.n_ieq_constr + 2 * self.n_var
self.xl = self._problem.xl
self.xu = self._problem.xu
obj_func = partial(problem.__class__._evaluate, problem)
ieq_func = partial(PymooProblemWithAD.ieq_constraint, self)
self._objective_jacobian = jit(jacrev(obj_func))
self._objective_hessian = jit(hessian(obj_func))
self._obj_func = jit(partial(problem.__class__._evaluate, problem))
ieq_func = jit(partial(PymooProblemWithAD.ieq_constraint, self))
self._objective_jacobian = jit(jacrev(self._obj_func))
self._objective_hessian = jit(hessian(self._obj_func))
self._ieq_jacobian = jit(jacfwd(ieq_func))
self.CPU_time: int = 0 # measured in nanoseconds

def objective(self, x: jnp.ndarray) -> jnp.ndarray:
return self._problem._evaluate(x)
return self._obj_func(x)

def ieq_constraint(self, x: jnp.ndarray) -> jnp.ndarray:
# box constraints are converted to inequality constraints
Expand Down
3 changes: 2 additions & 1 deletion hvd/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from typing import Any, Callable, Dict, Iterable, List, Sequence, Tuple, Union

import numpy as np
from jax import jit
from scipy.linalg import cholesky, qr
from sklearn.neighbors import LocalOutlierFactor

Expand Down Expand Up @@ -192,7 +193,7 @@ def __func__(ref, *arg, **kwargv):


# TODO: this is too slow, improve the algorithm
# @jit(nopython=True, error_model="numpy", cache=True)
@jit
def get_non_dominated(pareto_front: np.ndarray, return_index: bool = False, weakly_dominated: bool = True):
"""Find pareto front (undominated part) of the input performance data.
Minimization is assumed
Expand Down
4 changes: 2 additions & 2 deletions scripts/benchmark_DTLZ.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@

gen = 100
path = f"./DTLZ-gen{gen}/"
emoa = "NSGA-II"
emoa = "NSGA-III"


def plot(y0, Y, ref, hist_Y, history_medoids, hist_IGD, hist_R_norm, fig_name):
Expand Down Expand Up @@ -210,7 +210,7 @@ def execute(run: int):
if gen == 110 and problem_name == "DTLZ4":
run_id = list(set(run_id) - set([14]))

if 11 < 2:
if 1 < 2:
for i in run_id:
execute(i)
else:
Expand Down
32 changes: 18 additions & 14 deletions scripts/benchmark_EA.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,11 +17,8 @@
from pymoo.termination import get_termination
from pymoo.util.ref_dirs import get_reference_directions
from pymoo.util.reference_direction import UniformReferenceDirectionFactory
from scipy.io import savemat

from hvd.delta_p import GenerationalDistance, InvertedGenerationalDistance

# from hvd.hypervolume import hypervolume
from hvd.problems.base import MOOAnalytical

# ref_point = np.array([11, 11])
Expand Down Expand Up @@ -113,12 +110,13 @@ def minimize(
gd_value = GenerationalDistance(pareto_front).compute(Y=res.F)
igd_value = InvertedGenerationalDistance(pareto_front).compute(Y=res.F)
# return np.array([igd_value, gd_value, hypervolume(res.F, ref_point)])
# return np.array([igd_value, gd_value, np.sum(np.bitwise_or(res.X < 0, res.X > 1))])
return np.array([igd_value, gd_value])


def get_algorithm(n_objective: int, algorithm_name: str, constrained: bool) -> GeneticAlgorithm:
pop_size = 100 if n_objective == 2 else 300

def get_algorithm(
n_objective: int, algorithm_name: str, pop_size: int, constrained: bool
) -> GeneticAlgorithm:
if algorithm_name == "NSGA-II":
algorithm = NSGA2(pop_size=pop_size)
elif algorithm_name == "NSGA-III":
Expand Down Expand Up @@ -146,22 +144,28 @@ def get_algorithm(n_objective: int, algorithm_name: str, constrained: bool) -> G
return algorithm


gen = 110
n_gen = 1510 if gen == 110 else 100
gen_func = lambda n_var, n_obj: 36 * n_obj + 10 * n_obj * n_var
def get_Jacobian_calls(path, problem_name, algorithm_name, gen):
return int(np.median(pd.read_csv(f"{path}/{problem_name}-DpN-{algorithm_name}-{gen}.csv").Jac_calls))


n_iter_newton = 5
gen = 300
gen_func = lambda n_var, scale: 4 * scale + 10 * n_var
N = 30
problem = sys.argv[1]
for problem_name in [
problem,
]:

for problem_name in [problem]:
print(problem_name)
problem = get_problem(problem_name)
pop_size = 100 if problem.n_obj == 2 else 300
constrained = problem.n_eq_constr > 0 or problem.n_ieq_constr > 0
termination = get_termination("n_gen", n_gen + 3 * gen_func(problem.n_var, problem.n_obj))

for algorithm_name in ("NSGA-II",):
algorithm = get_algorithm(problem.n_obj, algorithm_name, constrained)
scale = int(
get_Jacobian_calls("./results", problem_name, algorithm_name, gen) / pop_size / n_iter_newton
)
termination = get_termination("n_gen", gen + n_iter_newton * gen_func(problem.n_var, scale))
algorithm = get_algorithm(problem.n_obj, algorithm_name, pop_size, constrained)
data = Parallel(n_jobs=N)(
delayed(minimize)(problem, algorithm, termination, run_id=i + 1, seed=i + 1, verbose=False)
for i in range(N)
Expand Down
50 changes: 28 additions & 22 deletions scripts/benchmark_ZDT.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,9 +40,10 @@
problem = PymooProblemWithAD(f)
pareto_front = problem.get_pareto_front(1000)

path = "./Gen1510/"
# path = "./Gen1510/"
path = "ZDT_gen_300"
emoa = "NSGA-II"
gen = 100
gen = 300


def plot(y0, Y, ref, hist_Y, history_medoids, hist_IGD, hist_R_norm, fig_name):
Expand All @@ -61,23 +62,24 @@ def plot(y0, Y, ref, hist_Y, history_medoids, hist_IGD, hist_R_norm, fig_name):
handle.set_markersize(10)

N = len(y0)
trajectory = np.array([y0] + hist_Y)
for i in range(N):
x, y = trajectory[:, i, 0], trajectory[:, i, 1]
ax1.quiver(
x[:-1],
y[:-1],
x[1:] - x[:-1],
y[1:] - y[:-1],
scale_units="xy",
angles="xy",
scale=1,
color="k",
width=0.003,
alpha=0.5,
headlength=4.5,
headwidth=2.5,
)
if 1 < 2:
trajectory = np.array([y0] + hist_Y)
for i in range(N):
x, y = trajectory[:, i, 0], trajectory[:, i, 1]
ax1.quiver(
x[:-1],
y[:-1],
x[1:] - x[:-1],
y[1:] - y[:-1],
scale_units="xy",
angles="xy",
scale=1,
color="k",
width=0.003,
alpha=0.5,
headlength=4.5,
headwidth=2.5,
)

lines = []
lines += ax1.plot(pareto_front[:, 0], pareto_front[:, 1], "g.", mec="none", ms=5, alpha=0.3)
Expand Down Expand Up @@ -156,6 +158,7 @@ def execute(run: int):
x0 = x0[idx]
y0 = y0[idx]
Y_label = Y_label[idx]
y0 = np.array([problem.objective(_) for _ in x0])
# if the number of clusters of `Y` is more than that of the reference set
if len(np.unique(Y_label)) > len(ref):
ref = np.vstack([r for r in ref.values()])
Expand Down Expand Up @@ -186,11 +189,14 @@ def execute(run: int):
X, Y, _ = opt.run()
# fig_name = f"./figure/{problem_name}_DpN_{emoa}_run{run}_{gen}.pdf"
fig_name = f"{problem_name}_DpN_{emoa}_run{run}_{gen}.pdf"
# plot(y0, Y, all_ref, opt.hist_Y, opt.history_medoids, opt.hist_IGD, opt.hist_R_norm, fig_name)
plot(y0, Y, all_ref, opt.hist_Y, opt.history_medoids, opt.hist_IGD, opt.hist_R_norm, fig_name)
gd_value = GenerationalDistance(pareto_front).compute(Y=Y)
igd_value = InvertedGenerationalDistance(pareto_front).compute(Y=Y)
data = np.concatenate([np.c_[[0] * len(x0), y0], np.c_[[max_iters] * len(x0), opt.hist_Y[-1]]], axis=0)
df = pd.DataFrame(data, columns=["iteration", "f1", "f2"])
df.to_csv(f"tmp/{problem_name}_DpN_{emoa}_run{run}_{gen}.csv")
# return np.array([igd_value, gd_value, hypervolume(Y, ref_point)])
return np.array([igd_value, gd_value])
return np.array([igd_value, gd_value, opt.state.n_jac_evals])


# get all run IDs
Expand All @@ -203,6 +209,6 @@ def execute(run: int):
execute(i)
else:
data = Parallel(n_jobs=n_jobs)(delayed(execute)(run=i) for i in run_id)
df = pd.DataFrame(np.array(data), columns=["IGD", "GD"])
df = pd.DataFrame(np.array(data), columns=["IGD", "GD", "Jac_calls"])
# df = pd.DataFrame(np.array(data), columns=["IGD", "GD", "HV"])
df.to_csv(f"{problem_name}-DpN-{emoa}-{gen}.csv", index=False)
91 changes: 57 additions & 34 deletions scripts/compute_statistics.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,43 +3,66 @@
from scipy.stats import mannwhitneyu
from statsmodels.stats.multitest import multipletests

gen = 110
emoa = "NSGA-II"
# out = []
# dfs = []
gen = 300
pvalues = []
stats = []
func = lambda x: f"{np.median(x):.4e} +/- {np.std(x) / np.sqrt(len(x)):.4e}"
stats_func = lambda x: [np.median(x), np.quantile(x, 0.9) - np.quantile(x, 0.1)]
MOEAs = [
"NSGA-II",
# "NSGA-III",
]
problems = [
# "ZDT1",
# "ZDT2",
# "ZDT3",
"ZDT4",
# "ZDT6",
# "DTLZ1",
# "DTLZ2",
# "DTLZ3",
# "DTLZ4",
# "DTLZ5",
# "DTLZ6",
# "DTLZ7",
]

# problems = ["DTLZ1", "DTLZ2", "DTLZ3", "DTLZ4", "DTLZ5", "DTLZ6", "DTLZ7"]
problems = ["ZDT1", "ZDT2", "ZDT3", "ZDT4", "ZDT6"]
for problem in problems:
data1 = pd.read_csv(f"./results/{problem}-DpN-{emoa}-{gen}.csv")
data2 = pd.read_csv(f"./results/{problem}-{emoa}-{gen}.csv")
x, y = data1.IGD.values, data2.IGD.values
# df1 = data1.loc[:, ["IGD"]]
# df1.insert(0, "dataset_name", problem)
# df1.insert(0, "classifier_name", "DpN")
# df2 = data2.loc[:, ["IGD"]]
# df2.insert(0, "dataset_name", problem)
# df2.insert(0, "classifier_name", emoa)
# dfs.append(pd.concat([df1, df2], axis=0))
for moea in MOEAs:
for problem in problems:
data1 = pd.read_csv(f"./results/{problem}-DpN-{moea}-{gen}.csv")
data2 = pd.read_csv(f"./results/{problem}-{moea}-{gen}.csv")
x, y = data1.IGD.values, data2.IGD.values
pvalue = mannwhitneyu(x=x, y=y, alternative="two-sided").pvalue
pvalues.append(pvalue)
stats.append([stats_func(x), stats_func(y)])

pvalue = mannwhitneyu(x=x, y=y, alternative="less").pvalue
pvalues.append(pvalue)
stats.append([func(x), func(y)])
# df1 = data1.apply(func, axis=0)
# df2 = data2.apply(lambda x: f"{np.median(x):.4e} +/- {np.std(x) / np.sqrt(len(x)):.4e}", axis=0)

# df = pd.concat(dfs, axis=0)
# df.rename(columns={"IGD": "accuracy"}, inplace=True)
# df.to_csv("example.csv", index=False)
reject, pvals_corrected, _, _ = multipletests(pvalues, alpha=0.05)
res = np.c_[np.vstack([np.array(stats).T, [f"{p:.4e}" for p in pvals_corrected]]), ["", "", sum(reject)]]
df = pd.DataFrame(
res,
columns=problems + ["Total"],
index=["Newton", emoa, "p-value"],
win, tie, loose = 0, 0, 0
for k, (n, m) in enumerate(stats):
x, y = n[0], m[0]
if reject[k]:
if x < y:
win += 1
s = "$\\uparrow$"
else:
loose += 1
s = "$\\downarrow$"
else:
tie += 1
s = "$\\leftrightarrow$"
stats[k] = [f"{n[0]:.4f}({n[1]:.4e}){s}", f"{m[0]:.4f}({m[1]:.4e})"]

Method = np.repeat(MOEAs, len(problems)).reshape(-1, 1)
Problem = np.tile(problems, len(MOEAs)).reshape(-1, 1)
summary = ["+/$\\approx$/-", "", f"{win}/{tie}/{loose}", ""]
data = pd.DataFrame(
np.vstack([np.hstack([Method, Problem, stats]), summary]), columns=["Method", "Problem", "Newton", "MOEA"]
)
df.to_csv(f"Newton-{emoa}-ZDT-{gen}.csv")
df.to_latex(f"Newton-{emoa}-ZDT-{gen}.tex")
print(data)
caption = """Warmstarting the Newton method after 1\,510 iterations of the MOEA, we show the IGD values
(median and 10\\% - 90\\% quantile range) of the final Pareto fronts, averaged over
30 independent runs. The Newton method is executed for five iterations, and the corresponding MOEA terminates
with 4\,870 iterations on ZDTs and 3\,700 on DTLZs. Mann–Whitney U test (with 5\\% significance level) is
employed to compare the performance of the Newton method and the MOEA, where Holm-Sidak method is used to
adjust the $p$-value for multiple testing.
"""
data.to_latex(f"Newton-{gen}.tex", index=False, caption=caption)
Loading

0 comments on commit 8be71bf

Please sign in to comment.