From 25aa1d424ee39b127b13ff5c28c60006ee560e91 Mon Sep 17 00:00:00 2001 From: wangronin Date: Fri, 5 Jul 2024 15:40:05 +0200 Subject: [PATCH] update --- examples/HVN/convex_example.py | 7 +- examples/HV_Hessian/2D_example_Hessian.py | 6 +- examples/HV_Hessian/3D_example_Hessian.py | 6 +- examples/HV_Hessian/4D_example_Hessian.py | 2 +- examples/MMD/example_ZDT.py | 1 + hvd/base.py | 134 +++++--- hvd/hypervolume_derivatives.py | 174 +++++++---- hvd/mmd.py | 6 +- hvd/mmd_newton.py | 6 +- hvd/newton.py | 359 +++++++++++----------- tests/test_2D.py | 8 +- tests/test_3D.py | 20 +- tests/test_ND.py | 6 +- 13 files changed, 422 insertions(+), 313 deletions(-) diff --git a/examples/HVN/convex_example.py b/examples/HVN/convex_example.py index 17f0d29..7ddf31d 100644 --- a/examples/HVN/convex_example.py +++ b/examples/HVN/convex_example.py @@ -52,7 +52,7 @@ def g_Hessian(x): ref = np.array([1, 1]) max_iters = 10 -N = 10 +N = 15 x1 = np.random.rand(N) x2 = x1**2 - 2 * x1 + 1 x0 = np.c_[x1, x2] @@ -75,6 +75,7 @@ def g_Hessian(x): minimization=True, max_iters=max_iters, verbose=True, + preconditioning=True, ) X, Y, stop = opt.run() @@ -112,8 +113,8 @@ def g_Hessian(x): ax0.legend([r"$Y_{\text{final}}$", r"$Y_0$", "Pareto front"]) ax12 = ax1.twinx() -ax1.plot(range(1, len(opt.history_HV) + 1), opt.history_HV, "b-") -ax12.semilogy(range(1, len(opt.history_HV) + 1), opt.hist_R_norm, "g--") +ax1.plot(range(1, len(opt.history_indicator_value) + 1), opt.history_indicator_value, "b-") +ax12.semilogy(range(1, len(opt.history_R_norm) + 1), opt.history_R_norm, "g--") ax1.set_ylabel("HV", color="b") ax12.set_ylabel(r"$||G(\mathbf{X})||$", color="g") ax1.set_title("Performance") diff --git a/examples/HV_Hessian/2D_example_Hessian.py b/examples/HV_Hessian/2D_example_Hessian.py index 18710dd..f518805 100644 --- a/examples/HV_Hessian/2D_example_Hessian.py +++ b/examples/HV_Hessian/2D_example_Hessian.py @@ -21,8 +21,8 @@ def MOP1_Hessian(x): ref = np.array([20, 20]) hvh = HypervolumeDerivatives( - n_decision_var=2, - n_objective=2, + n_var=2, + n_obj=2, ref=ref, func=MOP1, jac=MOP1_Jacobian, @@ -33,7 +33,7 @@ def MOP1_Hessian(x): X = np.random.rand(10, 2) Y = np.array([MOP1(_) for _ in X]) idx = get_non_dominated(-1 * Y, return_index=True) - out = hvh.compute(X[idx, :]) + out = hvh._compute_hessian(X[idx, :]) HdY2, HdX2 = out["HVdY2"], out["HVdX2"] HdX = out["HVdX"] w, v = np.linalg.eigh(HdX2) diff --git a/examples/HV_Hessian/3D_example_Hessian.py b/examples/HV_Hessian/3D_example_Hessian.py index cf18f4d..cbd4248 100644 --- a/examples/HV_Hessian/3D_example_Hessian.py +++ b/examples/HV_Hessian/3D_example_Hessian.py @@ -16,14 +16,14 @@ ref = np.array([2, 2, 2]) hvh = HypervolumeDerivatives( - n_decision_var=dim, - n_objective=3, + n_var=dim, + n_obj=3, ref=ref, func=f.objective, jac=f.objective_jacobian, hessian=f.objective_hessian, ) - out = hvh.compute(X) + out = hvh._compute_hessian(X) # automatic differentiation is very slow; # used for verifying the result of the analytical computation AD = hvh.compute_automatic_differentiation(X) diff --git a/examples/HV_Hessian/4D_example_Hessian.py b/examples/HV_Hessian/4D_example_Hessian.py index 1317c67..f286c51 100644 --- a/examples/HV_Hessian/4D_example_Hessian.py +++ b/examples/HV_Hessian/4D_example_Hessian.py @@ -10,7 +10,7 @@ ref = np.array([17, 35, 7, 10]) hvh = HypervolumeDerivatives(4, 4, ref, minimization=True) X = np.array([(16, 23, 1, 8), (14, 32, 2, 5), (12, 27, 3, 1), (10, 21, 4, 9), (8, 33, 5, 3)]) -out = hvh.compute(X) +out = hvh._compute_hessian(X) assert np.all( out["HVdY2"] diff --git a/examples/MMD/example_ZDT.py b/examples/MMD/example_ZDT.py index 593df89..4448316 100644 --- a/examples/MMD/example_ZDT.py +++ b/examples/MMD/example_ZDT.py @@ -74,6 +74,7 @@ max_iters=max_iters, verbose=True, metrics=metrics, + preconditioning=True, ) X, Y, _ = opt.run() diff --git a/hvd/base.py b/hvd/base.py index d806919..370626e 100644 --- a/hvd/base.py +++ b/hvd/base.py @@ -1,6 +1,7 @@ -import numpy as np +from copy import deepcopy +from typing import Callable, Self, Tuple -from .utils import merge_lists +import numpy as np class State: @@ -9,24 +10,31 @@ def __init__( n_var: int, n_eq: int, n_ieq: int, - func: callable, - jac: callable, - h: callable = None, - h_jac: callable = None, - g: callable = None, - g_jac: callable = None, + func: Callable, + jac: Callable, + h: Callable = None, + h_jac: Callable = None, + h_hess: Callable = None, + g: Callable = None, + g_jac: Callable = None, + g_hess: Callable = None, ) -> None: self.n_var: int = n_var # the number of the primal variables self.n_eq: int = n_eq self.n_ieq: int = n_ieq - self.func: callable = func - self._jac: callable = jac - self.h: callable = h - self.h_jac: callable = h_jac - self.g: callable = g - self.g_jac: callable = g_jac + self.n_cstr: int = self.n_eq + self.n_ieq + self.func: Callable = func + self._jac: Callable = jac + self.h: Callable = h + self.h_jac: Callable = h_jac + self.h_hess: Callable = h_hess + self.g: Callable = g + self.g_jac: Callable = g_jac + self.g_hess: Callable = g_hess self._constrained: bool = self.g is not None or self.h is not None self.n_jac_evals: int = 0 + self.n_cstr_jac_evals: int = 0 + self.n_cstr_hess_evals: int = 0 @property def N(self): @@ -37,32 +45,67 @@ def jac(self, x: np.ndarray) -> np.ndarray: self.n_jac_evals += 1 return self._jac(x) + def evaluate_cstr(self, x: np.ndarray) -> Tuple[np.ndarray, np.ndarray]: + H = self.h(x) if self.h is not None else [] + G = self.g(x) if self.g is not None else [] + if isinstance(H, (int, float)): + H = np.array([H]) + if isinstance(G, (int, float)): + G = np.array([G]) + active_indices = [True] * self.n_eq + if self.g is not None: + active_indices += (G >= -1e-4).tolist() + return np.r_[H, G], np.array(active_indices) + + def evaluate_cstr_jac(self, x: np.ndarray) -> np.ndarray: + if self.g_jac is not None or self.h_jac is not None: + self.n_cstr_jac_evals += 1 + # TODO: only evaluate active inequality constraints + dH = self.h_jac(x).tolist() if self.h_jac is not None else [] + dG = self.g_jac(x).tolist() if self.g_jac is not None else [] + return np.array(dH + dG).reshape(self.n_cstr, self.n_var) + + def evaluate_cstr_hess(self, x: np.ndarray) -> np.ndarray: + if self.g_hess is not None or self.h_hess is not None: + self.n_cstr_hess_evals += 1 + # TODO: only evaluate active inequality constraints + ddH = self.h_hess(x).tolist() if self.h_hess is not None else [] + ddG = self.g_hess(x).tolist() if self.g_hess is not None else [] + return np.array(ddH + ddG).reshape(self.n_cstr, self.n_var, self.n_var) + def update(self, X: np.ndarray, compute_gradient: bool = True): self.X = X primal_vars = self.primal self.Y = np.array([self.func(x) for x in primal_vars]) self.J = np.array([self.jac(x) for x in primal_vars]) if compute_gradient else None - if self._constrained: - eq = self._evaluate_constraints(primal_vars, type="eq", compute_gradient=compute_gradient) - ieq = self._evaluate_constraints(primal_vars, type="ieq", compute_gradient=compute_gradient) - cstr_value, active_indices, dH = merge_lists(eq, ieq) - self.cstr_value = cstr_value - self.active_indices = active_indices - self.dH = dH + cstr_value, active_indices = list(zip(*[self.evaluate_cstr(x) for x in primal_vars])) + self.cstr_value = np.array(cstr_value) + self.active_indices = np.array(active_indices) + self.cstr_grad = np.array([self.evaluate_cstr_jac(x) for x in primal_vars]) + self.cstr_hess = np.array([self.evaluate_cstr_hess(x) for x in primal_vars]) def update_one(self, x: np.ndarray, k: int): + primal_vars = x[: self.n_var] self.X[k] = x - x = np.atleast_2d(x) - primal_vars = x[:, : self.n_var] - self.Y[k] = self.func(primal_vars[0]) - self.J[k] = self.jac(primal_vars[0]) - if self._constrained: - eq = self._evaluate_constraints(primal_vars, type="eq", compute_gradient=True) - ieq = self._evaluate_constraints(primal_vars, type="ieq", compute_gradient=True) - cstr_value, active_indices, dH = merge_lists(eq, ieq) + self.Y[k] = self.func(primal_vars) + self.J[k] = self.jac(primal_vars) + cstr_value, active_indices = self.evaluate_cstr(primal_vars) + cstr_grad = self.evaluate_cstr_jac(primal_vars) + cstr_hess = self.evaluate_cstr_hess(primal_vars) + if cstr_value is not None: self.cstr_value[k] = cstr_value self.active_indices[k] = active_indices - self.dH[k] = dH + if cstr_grad is not None: + self.cstr_grad[k] = cstr_grad + if cstr_hess is not None: + self.cstr_hess[k] = cstr_hess + + def is_feasible(self) -> np.ndarray: + # TODO: should the active ieq cstr be considered infeasible? + ieq_active_idx = self.active_indices[:, self.n_eq :] + eq_feasible = np.all(np.isclose(self.H, 0, atol=1e-4, rtol=0), axis=1) + ieq_feasible = ~np.any(ieq_active_idx, axis=1) + return np.bitwise_and(eq_feasible, ieq_feasible) @property def primal(self) -> np.ndarray: @@ -84,18 +127,19 @@ def G(self) -> np.ndarray: """Inequality constraint values""" return self.cstr_value[:, self.n_eq :] - def _evaluate_constraints( - self, - primal_vars: np.ndarray, - type: str = "eq", - compute_gradient: bool = True, - ): - N = len(primal_vars) - func = self.h if type == "eq" else self.g - jac = self.h_jac if type == "eq" else self.g_jac - value = np.array([func(x) for x in primal_vars]).reshape(N, -1) - active_indices = [[True] * self.n_eq] * N if type == "eq" else [v >= -1e-4 for v in value] - active_indices = np.array(active_indices) - if compute_gradient: - dH = np.array([jac(x).reshape(-1, self.n_var) for x in primal_vars]) - return (value, active_indices, dH) if compute_gradient else (value, active_indices) + def __getitem__(self, indices: np.ndarray) -> Self: + """Slicing the state class + + Args: + indices (np.ndarray): indices to select + """ + obj = deepcopy(self) + obj.n_jac_evals = 0 + obj.X = obj.X[indices] + obj.Y = obj.Y[indices] + obj.J = obj.J[indices] + if self._constrained: + obj.cstr_value = obj.cstr_value[indices] + obj.active_indices = obj.active_indices[indices] + obj.cstr_grad = obj.cstr_grad[indices] + return obj diff --git a/hvd/hypervolume_derivatives.py b/hvd/hypervolume_derivatives.py index e45c5e3..66b3404 100644 --- a/hvd/hypervolume_derivatives.py +++ b/hvd/hypervolume_derivatives.py @@ -1,6 +1,6 @@ from typing import Dict, List, Tuple, Union -# TODO: remove dependencies on `autograd` +# TODO: remove dependencies on `autograd` and move to jax import autograd.numpy as np import numpy as np from autograd import hessian, jacobian @@ -9,7 +9,7 @@ from .hypervolume import hypervolume from .utils import non_domin_sort -__author__ = ["Hao Wang"] +__author__ = "Hao Wang" def HVY(n_objective: int, ref: np.ndarray) -> callable: @@ -36,8 +36,8 @@ class HypervolumeDerivatives: def __init__( self, - n_decision_var: int, - n_objective: int, + n_var: int, + n_obj: int, ref: Union[np.ndarray, List[List]], func: callable = None, jac: callable = None, @@ -73,8 +73,8 @@ def __init__( if hessian is None: hessian = lambda x: np.zeros((len(x), len(x), len(x))) - self.n_decision_var = int(n_decision_var) - self.n_objective = int(n_objective) + self.n_var = int(n_var) + self.n_obj = int(n_obj) self.func = func if minimization else lambda x: -1 * func(x) self.jac = jac if minimization else lambda x: -1 * jac(x) self.hessian = hessian if minimization else lambda x: -1 * hessian(x) @@ -103,7 +103,7 @@ def objective_points(self, points): if not isinstance(points, np.ndarray): points = np.asarray(points) self._objective_points = points - assert self.n_objective == self._objective_points.shape[1] + assert self.n_obj == self._objective_points.shape[1] if self._objective_points.shape[1] == 1: self._nondominated_indices = np.array([np.argmin(self._objective_points.ravel())]) @@ -114,45 +114,58 @@ def objective_points(self, points): def _check_X(self, X: np.ndarray): X = np.atleast_2d(X) - if X.shape[1] != self.n_decision_var: - X = X.reshape(-1, self.n_decision_var) + if X.shape[1] != self.n_var: + X = X.reshape(-1, self.n_var) return X def _check_Y(self, Y: np.ndarray): Y = np.atleast_2d(Y) - if Y.shape[1] != self.n_objective: - Y = Y.reshape(-1, self.n_decision_var) + if Y.shape[1] != self.n_obj: + Y = Y.reshape(-1, self.n_var) return Y - def HV(self, X: np.ndarray) -> float: - X = self._check_X(X) - Y = np.array([self.func(x) for x in X]) + def compute(self, X: np.ndarray = None, Y: np.ndarray = None) -> float: + if Y is None: + assert X is not None + assert self.func is not None + X = self._check_X(X) + Y = np.array([self.func(x) for x in X]) return hypervolume(Y, self.ref) - def project( - self, axis: int, i: int, pareto_front: np.ndarray = None, ref: np.ndarray = None - ) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]: - """projecting the Pareto front along `axis` with respect to the i-th point""" - pareto_front = self.objective_points if pareto_front is None else pareto_front - ref = self.ref if ref is None else ref - y = pareto_front[i, :] - # projection: drop the `axis`-th dimension - y_ = np.delete(y, obj=axis) - ref_ = np.delete(ref, obj=axis) - idx = np.nonzero(pareto_front[:, axis] < y[axis])[0] - pareto_front_ = np.delete(pareto_front[idx, :], obj=axis, axis=1) - if len(pareto_front_) != 0: - if pareto_front_.shape[1] == 1: - pareto_indices = np.array([np.argmin(pareto_front_.ravel())]) - else: - pareto_indices = non_domin_sort(pareto_front_, only_front_indices=True)[0] - # NOTE: implement a fast algorithm to get the non dominated subset - # pareto_indices = get_non_dominated(pareto_front_, return_index=True) - pareto_front_ = pareto_front_[pareto_indices] - idx = idx[pareto_indices] - return y_, pareto_front_, ref_, idx + def compute_derivatives( + self, + X: np.ndarray, + Y: np.ndarray = None, + compute_hessian: bool = True, + YdX: np.ndarray = None, + ) -> Union[np.ndarray, Tuple[np.ndarray, np.ndarray]]: + """compute the derivatives of the inverted generational distance^p + + Args: + X (np.ndarray): the decision points of shape (N, dim). + Y (np.ndarray, optional): the objective points of shape (N, n_objective). Defaults to None. + compute_hessian (bool, optional): whether the Hessian is computed. Defaults to True. + jacobian (np.ndarray, optional): Jacobian of the objective function at `X`. Defaults to None. - def _compute_gradient(self, X: np.ndarray, Y: np.ndarray = None) -> Dict[str, np.ndarray]: + Returns: + Union[np.ndarray, Tuple[np.ndarray, np.ndarray]]: + if `compute_hessian` = True, it returns (gradient, Hessian) + otherwise, it returns (gradient, ) + """ + if compute_hessian: + out = self._compute_hessian(X, Y, YdX) + HVdX, HVdX2 = out["HVdX"], out["HVdX2"] + else: + HVdX = self._compute_gradient(X, Y, YdX)["HVdX"] + HVdX = HVdX.reshape(len(X), -1) + return (HVdX, HVdX2) if compute_hessian else HVdX + + def _compute_gradient( + self, + X: np.ndarray, + Y: np.ndarray = None, + YdX: np.ndarray = None, + ) -> Dict[str, np.ndarray]: """compute the hypervolume gradient using analytical expressions Parameters @@ -177,17 +190,19 @@ def _compute_gradient(self, X: np.ndarray, Y: np.ndarray = None) -> Dict[str, np } """ X = self._check_X(X) - Y, YdX, YdX2 = self._compute_objective_derivatives(X, Y) + Y, YdX = self._compute_objective_derivatives(X, Y, YdX) self.objective_points = Y - HVdY = np.zeros((self.N, self.n_objective)) + HVdY = np.zeros((self.N, self.n_obj)) HVdY[self._nondominated_indices] = self.hypervolume_dY( self.objective_points[self._nondominated_indices], self.ref ) HVdY = HVdY.reshape(1, -1)[0] HVdX = HVdY @ YdX - return dict(HVdX=HVdX, HVdY=HVdY, YdX=YdX, YdX2=YdX2) + return dict(HVdX=HVdX, HVdY=HVdY, YdX=YdX) - def compute(self, X: np.ndarray, Y: np.ndarray = None) -> Dict[str, np.ndarray]: + def _compute_hessian( + self, X: np.ndarray, Y: np.ndarray = None, YdX: np.ndarray = None + ) -> Dict[str, np.ndarray]: """compute the hypervolume gradient and Hessian matrix using analytical expressions Parameters @@ -211,15 +226,17 @@ def compute(self, X: np.ndarray, Y: np.ndarray = None) -> Dict[str, np.ndarray]: } """ X = self._check_X(X) - res = self._compute_gradient(X, Y) - HVdY, HVdX, YdX, YdX2 = res["HVdY"], res["HVdX"], res["YdX"], res["YdX2"] - HVdY2 = np.zeros((self.N * self.n_objective, self.N * self.n_objective)) + # TODO: fix passing the input arugment `YdX` + res = self._compute_gradient(X, Y, YdX) + Y, YdX, YdX2 = self._compute_objective_derivatives(X, Y, YdX, compute_hessian=True) + HVdY, HVdX = res["HVdY"], res["HVdX"] + HVdY2 = np.zeros((self.N * self.n_obj, self.N * self.n_obj)) for i in range(self.N): if i in self._dominated_indices: # if the point is dominated continue - for k in range(self.n_objective): + for k in range(self.n_obj): # project along `axis`; `*_` indicates variables in the projected subspace - y_, pareto_front_, ref_, proj_idx = self.project(axis=k, i=i) + y_, pareto_front_, ref_, proj_idx = self._project(axis=k, i=i) # partial derivatives ∂(∂HV/∂y_k^i)/∂y^i # of shape (1, dim), where the k-th element is zero Y_ = np.vstack([y_, pareto_front_]) @@ -232,8 +249,8 @@ def compute(self, X: np.ndarray, Y: np.ndarray = None) -> Dict[str, np.ndarray]: idx = np.where(pareto_indices == 0)[0] out = self.hypervolume_dY(Y_[pareto_indices], ref_)[idx] - rows = slice(i * self.n_objective, (i + 1) * self.n_objective) - HVdY2[rows, i * self.n_objective + k] = np.insert(-1.0 * out, k, 0) + rows = slice(i * self.n_obj, (i + 1) * self.n_obj) + HVdY2[rows, i * self.n_obj + k] = np.insert(-1.0 * out, k, 0) # partial derivatives ∂(∂HV/∂y_k^i)/∂y^{-i} # of shape (len(proj_idx), dim), where the k-th element is zero # ∂HV/∂y_k^i is the hypervolume improvement of `x_` w.r.t. `pareto_front_` @@ -244,8 +261,8 @@ def compute(self, X: np.ndarray, Y: np.ndarray = None) -> Dict[str, np.ndarray]: # hypervolume improvement of points in `pareto_front_` decreases ∂HV/∂y_k^i out = np.insert(out, k, 0, axis=1) for s, j in enumerate(proj_idx): - rows = slice(j * self.n_objective, (j + 1) * self.n_objective) - HVdY2[rows, i * self.n_objective + k] = out[s] + rows = slice(j * self.n_obj, (j + 1) * self.n_obj) + HVdY2[rows, i * self.n_obj + k] = out[s] # TODO: use sparse matrix multiplication here HVdX2 = YdX.T @ HVdY2 @ YdX + np.einsum("...i,i...", HVdY, YdX2) @@ -258,6 +275,29 @@ def compute(self, X: np.ndarray, Y: np.ndarray = None) -> Dict[str, np.ndarray]: HVdY2=HVdY2, ) + def _project( + self, axis: int, i: int, pareto_front: np.ndarray = None, ref: np.ndarray = None + ) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]: + """projecting the Pareto front along `axis` with respect to the i-th point""" + pareto_front = self.objective_points if pareto_front is None else pareto_front + ref = self.ref if ref is None else ref + y = pareto_front[i, :] + # projection: drop the `axis`-th dimension + y_ = np.delete(y, obj=axis) + ref_ = np.delete(ref, obj=axis) + idx = np.nonzero(pareto_front[:, axis] < y[axis])[0] + pareto_front_ = np.delete(pareto_front[idx, :], obj=axis, axis=1) + if len(pareto_front_) != 0: + if pareto_front_.shape[1] == 1: + pareto_indices = np.array([np.argmin(pareto_front_.ravel())]) + else: + pareto_indices = non_domin_sort(pareto_front_, only_front_indices=True)[0] + # NOTE: implement a fast algorithm to get the non dominated subset + # pareto_indices = get_non_dominated(pareto_front_, return_index=True) + pareto_front_ = pareto_front_[pareto_indices] + idx = idx[pareto_indices] + return y_, pareto_front_, ref_, idx + def compute_automatic_differentiation(self, X: np.ndarray) -> Dict[str, np.ndarray]: """compute the hypervolume gradient and Hessian matrix using automatic differentiation @@ -282,9 +322,9 @@ def compute_automatic_differentiation(self, X: np.ndarray) -> Dict[str, np.ndarr } """ if self._HV_Jac is None: - self._HV_Jac = jacobian(HVY(self.n_objective, self.ref)) + self._HV_Jac = jacobian(HVY(self.n_obj, self.ref)) if self._HV_Hessian is None: - self._HV_Hessian = hessian(HVY(self.n_objective, self.ref)) + self._HV_Hessian = hessian(HVY(self.n_obj, self.ref)) X = self._check_X(X) Y, YdX, YdX2 = self._compute_objective_derivatives(X) @@ -329,31 +369,37 @@ def hypervolume_dY(self, pareto_front: np.ndarray, ref: np.ndarray) -> np.ndarra # higher dimensional cases: recursive computation for i in range(N): for k in range(dim): - y_, pareto_front_, ref_, _ = self.project(k, i, pareto_front, ref) + y_, pareto_front_, ref_, _ = self._project(k, i, pareto_front, ref) # NOTE: `-1.0` -> since we assume a minimization problem HVdY[i, k] = -1.0 * hypervolume_improvement(y_, pareto_front_, ref_) return HVdY def _compute_objective_derivatives( - self, X: np.ndarray, Y: np.ndarray = None + self, + X: np.ndarray, + Y: np.ndarray = None, + YdX: np.ndarray = None, + compute_hessian: np.ndarray = False, ) -> Tuple[np.ndarray, np.ndarray, np.ndarray]: """compute the objective function value, the Jacobian, and Hessian tensor""" if not isinstance(X, np.ndarray): X = np.asarray(X) - if X.shape[1] != self.n_decision_var: + if X.shape[1] != self.n_var: X = X.T self.N = X.shape[0] # number of points if Y is None: # do not evaluate the function when `Y` is provided Y = np.array([self.func(x) for x in X]) # `(N, n_objective)` # Jacobians - YdX = np.array([self.jac(x) for x in X]) # `(N, n_objective, n_decision_var)` + # `(N, n_objective, n_decision_var)` + YdX = np.array([self.jac(x) for x in X]) if YdX is None else YdX.copy() YdX = block_diag(*YdX) # `(N * n_objective, N * n_decision_var)` grouped by points # Hessians - _YdX2 = np.array([self.hessian(x) for x in X]) # `(N, n_objective, n_decision_var, n_decision_var)` - YdX2 = np.zeros( - (self.N * self.n_objective, self.N * self.n_decision_var, self.N * self.n_decision_var) - ) - for i in range(self.N): - idx = slice(i * self.n_decision_var, (i + 1) * self.n_decision_var) - YdX2[i * self.n_objective : (i + 1) * self.n_objective, idx, idx] = _YdX2[i, ...] - return Y, YdX, YdX2 + if compute_hessian: + _YdX2 = np.array( + [self.hessian(x) for x in X] + ) # `(N, n_objective, n_decision_var, n_decision_var)` + YdX2 = np.zeros((self.N * self.n_obj, self.N * self.n_var, self.N * self.n_var)) + for i in range(self.N): + idx = slice(i * self.n_var, (i + 1) * self.n_var) + YdX2[i * self.n_obj : (i + 1) * self.n_obj, idx, idx] = _YdX2[i, ...] + return (Y, YdX, YdX2) if compute_hessian else (Y, YdX) diff --git a/hvd/mmd.py b/hvd/mmd.py index 502d7d9..f8fc588 100644 --- a/hvd/mmd.py +++ b/hvd/mmd.py @@ -253,9 +253,11 @@ def compute_derivatives( if `compute_hessian` = True, it returns (gradient, Hessian) otherwise, it returns (gradient, ) """ - grad = self.compute_gradient(X, Y, jacobian)["MMDdX"] if compute_hessian: - hessian = self.compute_hessian(X, Y)["MMDdX2"] + out = self.compute_hessian(X, Y) + grad, hessian = out["MMDdX"], out["MMDdX2"] + else: + grad = self.compute_gradient(X, Y, jacobian)["MMDdX"] return (grad, hessian) if compute_hessian else grad def compute_gradient( diff --git a/hvd/mmd_newton.py b/hvd/mmd_newton.py index 84ba55c..f816193 100644 --- a/hvd/mmd_newton.py +++ b/hvd/mmd_newton.py @@ -99,7 +99,7 @@ def __init__( self.xu: np.ndarray = xu self.ref: ClusteredReferenceSet = ref # TODO: we should pass ref to the indicator directly self._check_constraints(h, g) - self.state: State = State(self.dim_p, self.n_eq, self.n_ieq, func, jac, h, h_jac, g, g_jac) + self.state = State(self.dim_p, self.n_eq, self.n_ieq, func, jac, h=h, h_jac=h_jac, g=g, g_jac=g_jac) # TODO: move indicator out of this class self.indicator = MMDMatching( self.dim_p, self.n_obj, self.ref, func, jac, hessian, theta=1.0 / N, beta=0.2 @@ -235,7 +235,7 @@ def _compute_R( if self._constrained: R = np.zeros((state.N, self.dim)) # the root-finding problem func = lambda g, dual, h: g + np.einsum("j,jk->k", dual, h) - cstr_value, idx, dH = state.cstr_value, state.active_indices, state.dH + cstr_value, idx, dH = state.cstr_value, state.active_indices, state.cstr_grad dH = [dH[i, idx, :] for i, idx in enumerate(idx)] # only take Jacobian of the active constraints active_indices = np.c_[active_indices, idx] for i, k in enumerate(active_indices): @@ -298,7 +298,7 @@ def _backtracking_line_search( def phi_func(alpha, i): state = deepcopy(self.state) - x = state.X[i] + x = state.X[i].copy() x += alpha * step[i] state.update_one(x, i) self.indicator.re_match = False diff --git a/hvd/newton.py b/hvd/newton.py index b1f91aa..b961985 100644 --- a/hvd/newton.py +++ b/hvd/newton.py @@ -10,7 +10,6 @@ from .base import State from .delta_p import GenerationalDistance, InvertedGenerationalDistance -from .hypervolume import hypervolume from .hypervolume_derivatives import HypervolumeDerivatives from .utils import compute_chim, get_logger, non_domin_sort, precondition_hessian, set_bounds @@ -72,10 +71,20 @@ def __init__( self.xl: np.ndarray = xl self.xu: np.ndarray = xu self.ref: np.ndarray = ref - self.h_hessian: Callable = h_hessian - self.g_hessian: Callable = g_hessian self._check_constraints(h, g) - self.state: State = State(self.dim_p, self.n_eq, self.n_ieq, func, jac, h, h_jac, g, g_jac) + self.state = State( + self.dim_p, + self.n_eq, + self.n_ieq, + func, + jac, + h=h, + h_jac=h_jac, + h_hess=h_hessian, + g=g, + g_jac=g_jac, + g_hess=g_hessian, + ) self.indicator = HypervolumeDerivatives(self.dim_p, self.n_obj, ref, func, jac, hessian) self._initialize(X0) self._set_logging(verbose) @@ -128,15 +137,11 @@ def _set_logging(self, verbose: bool): self.curr_indicator_value: float = None self.history_Y: List[np.ndarray] = [] self.history_X: List[np.ndarray] = [] - self.history_HV: List[float] = [] - # self.history_indicator_value: List[float] = [] + self.history_indicator_value: List[float] = [] self.history_R_norm: List[float] = [] self.history_metrics: Dict[str, List] = defaultdict(list) self.history_medoids: Dict[int, List] = defaultdict(list) self.logger: logging.Logger = get_logger(logger_id=f"{self.__class__.__name__}", console=self.verbose) - self._delta_X: float = np.inf - self._delta_Y: float = np.inf - self._delta_HV: float = np.inf @property def xl(self): @@ -158,32 +163,7 @@ def run(self) -> Tuple[np.ndarray, np.ndarray, Dict]: while not self.terminate(): self.newton_iteration() self.log() - return self._get_primal_dual(self.X)[0], self.Y, self.stop_dict - - def _precondition_hessian(self, H: np.ndarray) -> np.ndarray: - """Precondition the Hessian matrix to make sure it is negative definite - - Args: - H (np.ndarray): the Hessian matrix - - Returns: - np.ndarray: the preconditioned Hessian - """ - # pre-condition the Hessian - beta = 1e-6 - v = np.min(np.diag(-H)) - tau = 0 if v > 0 else -v + beta - I = np.eye(H.shape[0]) - for _ in range(35): - try: - cholesky(-H + tau * I, lower=True) - break - except: - # NOTE: the multiplier is not working for Eq1IDTLZ3.. Otherwise, it takes 1.5 - tau = max(1.5 * tau, beta) - else: - self.logger.warn("Pre-conditioning the HV Hessian failed") - return H - tau * I + return self.state.primal, self.state.Y, self.stop_dict def _compute_R( self, state: State, grad: np.ndarray = None @@ -195,38 +175,33 @@ def _compute_R( the Jacobian of the equality constraints, and the indices of the active primal-dual variables """ - primal_vars, dual_vars = state.primal, state.dual if grad is None: - grad = self.indicator._compute_gradient(X=primal_vars, Y=state.Y, jacobian=state.J) + grad = self.indicator.compute_derivatives(state.primal, state.Y, False, state.J) # the unconstrained case R, dH, active_indices = grad, None, np.array([[True] * state.n_var] * state.N) if self._constrained: R = np.zeros((state.N, self.dim)) # the root-finding problem func = lambda g, dual, h: g + np.einsum("j,jk->k", dual, h) - cstr_value, idx, dH = state.cstr_value, state.active_indices, state.dH + cstr_value, idx, dH = state.cstr_value, state.active_indices, state.cstr_grad dH = [dH[i, idx, :] for i, idx in enumerate(idx)] # only take Jacobian of the active constraints active_indices = np.c_[active_indices, idx] for i, k in enumerate(active_indices): - R[i, k] = np.r_[func(grad[i], dual_vars[i, idx[i]], dH[i]), cstr_value[i, idx[i]]] + R[i, k] = np.r_[func(grad[i], state.dual[i, idx[i]], dH[i]), cstr_value[i, idx[i]]] return R, dH, active_indices - def _compute_netwon_step(self) -> Tuple[np.ndarray, np.ndarray]: - primal_vars, dual_vars = self.state.primal, self.state.dual - grad, Hessian = self.indicator.compute(X=primal_vars, Y=dual_vars, jacobian=self.state.J) - if self.preconditioning: # sometimes the Hessian is not NSD + def _compute_netwon_step(self, state: State) -> Tuple[np.ndarray, np.ndarray]: + grad, Hessian = self.indicator.compute_derivatives(X=state.primal, Y=state.Y, YdX=state.J) + R, H, active_indices = self._compute_R(state, grad=grad) + # sometimes the Hessian is not NSD + if self.preconditioning: Hessian = self._precondition_hessian(Hessian) - R, dH, active_indices = self._compute_R(self.state, grad=grad) - DR = Hessian - idx = active_indices[:, self.dim_p :] + DR, idx = Hessian, active_indices[:, self.dim_p :] if self._constrained: - dH = block_diag(*dH) # (N * p, N * dim), `p` is the number of active constraints - ddH = block_diag(*[self.state.ddH[i, k, ...] * dual_vars[i, k] for i, k in enumerate(idx)]) - # ddH = block_diag( - # *[np.einsum("ijk,i->jk", self._h_hessian(x), dual_vars[i]) for i, x in enumerate(primal_vars)] - # *[(self.g_hessian(x) * dual_vars[i, idx[i]])[0] for i, x in enumerate(primal_vars)] - # ) # (N * dim, mu * dim) - Z = np.zeros((len(dH), len(dH))) - DR = np.r_[np.c_[DR + ddH, dH.T], np.c_[dH, Z]] + H = block_diag(*H) # (N * p, N * dim), `p` is the number of active constraints + B = state.cstr_hess + M = block_diag(*[np.einsum("i...,i", B[i, k], state.dual[i, k]) for i, k in enumerate(idx)]) + Z = np.zeros((len(H), len(H))) + DR = np.r_[np.c_[DR + M, H.T], np.c_[H, Z]] # the vector-format of R R_ = np.r_[R[:, : self.dim_p].reshape(-1, 1), R[:, self.dim_p :][idx].reshape(-1, 1)] with warnings.catch_warnings(): @@ -239,92 +214,171 @@ def _compute_netwon_step(self) -> Tuple[np.ndarray, np.ndarray]: newton_step = Nd_vector_to_matrix(newton_step_.ravel(), self.N, self.dim, self.dim_p, active_indices) return newton_step, R + def _compute_indicator_value(self): + self.curr_indicator_value = self.indicator.compute(Y=self.state.Y) + def newton_iteration(self): - self._check_XY() - self.step = np.zeros((self.N, self.dim)) - self.step_size = np.ones(self.N) - self.G = np.zeros((self.N, self.dim)) + # check for anomalies in `X` and `Y` + self._check_population() + # first compute the current indicator value + self._compute_indicator_value() + self._nondominated_idx = non_domin_sort(self.state.Y, only_front_indices=True)[0] # partition the approximation set to by feasibility - self._nondominated_idx = non_domin_sort(self.Y, only_front_indices=True)[0] - if self.h is None: - feasible_mask = np.array([True] * self.N) - else: - eq_cstr = np.array([self.h(_) for _ in self._get_primal_dual(self.X)[0]]).reshape(self.N, -1) - feasible_mask = np.all(np.isclose(eq_cstr, 0, atol=1e-4, rtol=0), axis=1) - + feasible_mask = self.state.is_feasible() feasible_idx = np.nonzero(feasible_mask)[0] - dominated_idx = list((set(range(self.N)) - set(self._nondominated_idx) - set(feasible_idx))) - if np.any(feasible_mask): + infeasible_idx = np.nonzero(~feasible_mask)[0] + partitions = {0: np.array(range(self.N))} + if len(feasible_idx) > 0: # non-dominatd sorting of the feasible points - partitions = non_domin_sort(self.Y[feasible_mask], only_front_indices=True) + partitions = non_domin_sort(self.state.Y[feasible_idx], only_front_indices=True) partitions = {k: feasible_idx[v] for k, v in partitions.items()} - partitions.update({0: np.sort(np.r_[partitions[0], np.nonzero(~feasible_mask)[0]])}) - else: - partitions = {0: np.array(range(self.N))} + # add all infeasible points to the first dominance layer + partitions.update({0: np.sort(np.r_[partitions[0], infeasible_idx])}) # compute the Newton direction for each partition - for _, idx in partitions.items(): - out = self._compute_netwon_step(X=self.X[idx], Y=self.Y[idx]) - self.step[idx, :] = out["step"] - self.G[idx, :] = out["G"] + self.step = np.zeros((self.N, self.dim)) + self.step_size = np.ones(self.N) + self.R = np.zeros((self.N, self.dim)) + for i, idx in partitions.items(): + # compute Newton step + newton_step, R = self._compute_netwon_step(self.state[idx]) + self.step[idx, :] = newton_step + self.R[idx, :] = R # backtracking line search with Armijo's condition for each layer - if _ == 0 and len(dominated_idx) > 0: # for the first layer - idx_ = list(set(idx) - set(dominated_idx)) - for k in dominated_idx: # for dominated and infeasible points - self.step_size[k] = self._line_search_dominated(self.X[[k]], self.step[[k]]) - self.step_size[idx_] = self._line_search(self.X[idx_], self.step[idx_], G=self.G[idx_]) - else: # for all other layers - self.step_size[idx] = self._line_search(self.X[idx], self.step[idx], G=self.G[idx]) - - self.X += self.step_size.reshape(-1, 1) * self.step - # evaluation - self.Y = np.array([self.func(x) for x in self._get_primal_dual(self.X)[0]]) + idx_ = list(set(idx) - set(infeasible_idx)) if i == 0 else idx + self.step_size[idx_] = self._line_search(self.state[idx_], self.step[idx_], R=self.R[idx_]) + # TODO: unify `_line_search` and `_line_search_dominated` + # for k in infeasible_idx: # for dominated and infeasible points + # self.step_size[k] = self._line_search(self.state[infeasible_idx], self.step[infeasible_idx], R=self.R[infeasible_idx]) + # TODO: this part should be tested + self.step_size[infeasible_idx] = self._line_search( + self.state[infeasible_idx], self.step[infeasible_idx], R=self.R[infeasible_idx] + ) + # Newton iteration and evaluation + self.state.update(self.state.X + self.step_size.reshape(-1, 1) * self.step) + + def log(self): self.iter_count += 1 + self.history_X += [self.state.primal.copy()] + self.history_Y += [self.state.Y.copy()] + self.history_indicator_value += [self.curr_indicator_value] + self.history_R_norm += [np.median(np.linalg.norm(self.R, axis=1))] + for name, func in self.metrics.items(): # compute the performance metrics + self.history_metrics[name].append(func.compute(Y=self.state.Y)) + if self.verbose: + self.logger.info(f"iteration {self.iter_count} ---") + self.logger.info(f"{self.indicator.__class__.__name__}: {self.curr_indicator_value}") + self.logger.info(f"step size: {self.step_size.ravel()}") + self.logger.info(f"R norm: {self.history_R_norm[-1]}") + self.logger.info(f"#non-dominated: {len(self._nondominated_idx)}") + + def terminate(self) -> bool: + if self.iter_count >= self.max_iters: + self.stop_dict["iter_count"] = self.iter_count + + return bool(self.stop_dict) + + def _precondition_hessian(self, H: np.ndarray) -> np.ndarray: + """Precondition the Hessian matrix to make sure it is negative definite + + Args: + H (np.ndarray): the Hessian matrix + + Returns: + np.ndarray: the preconditioned Hessian + """ + # TODO: remove this function + # pre-condition the Hessian + beta = 1e-6 + v = np.min(np.diag(-H)) + tau = 0 if v > 0 else -v + beta + I = np.eye(H.shape[0]) + for _ in range(35): + try: + cholesky(-H + tau * I, lower=True) + break + except: + # NOTE: the multiplier is not working for Eq1IDTLZ3.. Otherwise, it takes 1.5 + tau = max(1.5 * tau, beta) + else: + self.logger.warn("Pre-conditioning the HV Hessian failed") + return H - tau * I - def _line_search(self, X: np.ndarray, step: np.ndarray, G: np.ndarray) -> float: + def _line_search( + self, state: State, step: np.ndarray, R: np.ndarray, max_step_size: float = None + ) -> float: """backtracking line search with Armijo's condition""" - # TODO: ad-hoc! to solve this in the further using a high precision numerical library - # NOTE: when the step length is close to numpy's numerical resolution, it makes no sense to perform - # the step-size control - if np.any(np.isclose(np.median(step), np.finfo(np.double).resolution)): + c1 = 1e-4 + if np.all(np.isclose(step, 0)): return 1 - c = 1e-5 - N = len(X) - primal_vars = self._get_primal_dual(X)[0] - normal_vectors = np.c_[np.eye(self.dim_p * N), -1 * np.eye(self.dim_p * N)] - # calculate the maximal step-size - dist = np.r_[ - np.abs(primal_vars.ravel() - np.tile(self.xl, N)), - np.abs(np.tile(self.xu, N) - primal_vars.ravel()), - ] - v = step[:, : self.dim_p].ravel() @ normal_vectors - alpha = min(1, 0.25 * np.min(dist[v < 0] / np.abs(v[v < 0]))) + def phi_func(alpha): + state_ = deepcopy(state) + state_.update(state.X + alpha * step) + R = self._compute_R(state_)[0] + return np.linalg.norm(R) + step_size = 1 if max_step_size is None else max_step_size + phi = [np.linalg.norm(R)] + s = [0, step_size] for _ in range(6): - X_ = X + alpha * step - if self.h is None: - HV = self.indicator.HV(X) - HV_ = self.indicator.HV(X_) - inc = np.inner(G.ravel(), step.ravel()) - cond = HV_ - HV >= c * alpha * inc - else: - G_ = self._compute_R(X_) - cond = np.linalg.norm(G_) <= (1 - c * alpha) * np.linalg.norm(G) - if cond: + phi.append(phi_func(s[-1])) + # Armijo–Goldstein condition + # when R norm is close to machine precision, it makes no sense to perform the line search + success = phi[-1] <= (1 - c1 * s[-1]) * phi[0] or np.isclose(phi[0], np.finfo(float).eps) + if success: break else: if 11 < 2: - phi0 = HV if self.h is None else np.sum(G**2) / 2 - phi1 = HV_ if self.h is None else np.sum(G_**2) / 2 - phi0prime = inc if self.h is None else -np.sum(G**2) - alpha = -phi0prime * alpha**2 / (phi1 - phi0 - phi0prime * alpha) / 2 - # alpha *= tau - if 1 < 2: - alpha *= 0.5 + # cubic interpolation to compute the next step length + d1 = -phi[-2] - phi[-1] - 3 * (phi[-2] - phi[-1]) / (s[-2] - s[-1]) + d2 = np.sign(s[-1] - s[-2]) * np.sqrt(d1**2 - phi[-2] * phi[-1]) + s_ = s[-1] - (s[-1] - s[-2]) * (-phi[-1] + d2 - d1) / (-phi[-1] + phi[-2] + 2 * d2) + s_ = s[-1] * 0.5 if np.isnan(s_) else np.clip(s_, 0.4 * s[-1], 0.6 * s[-1]) + s.append(s_) + else: + s.append(s[-1] * 0.5) else: - self.logger.warn("Armijo's backtracking line search failed") - return alpha + self.logger.warn("backtracking line search failed") + step_size = s[-1] + return step_size + + # c = 1e-5 + # N = len(X) + # primal_vars = self._get_primal_dual(X)[0] + # normal_vectors = np.c_[np.eye(self.dim_p * N), -1 * np.eye(self.dim_p * N)] + # # calculate the maximal step-size + # dist = np.r_[ + # np.abs(primal_vars.ravel() - np.tile(self.xl, N)), + # np.abs(np.tile(self.xu, N) - primal_vars.ravel()), + # ] + # v = step[:, : self.dim_p].ravel() @ normal_vectors + # alpha = min(1, 0.25 * np.min(dist[v < 0] / np.abs(v[v < 0]))) + + # for _ in range(6): + # X_ = X + alpha * step + # if self.h is None: + # HV = self.indicator.compute(X) + # HV_ = self.indicator.compute(X_) + # inc = np.inner(R.ravel(), step.ravel()) + # cond = HV_ - HV >= c * alpha * inc + # else: + # G_ = self._compute_R(X_) + # cond = np.linalg.norm(G_) <= (1 - c * alpha) * np.linalg.norm(R) + # if cond: + # break + # else: + # if 11 < 2: + # phi0 = HV if self.h is None else np.sum(R**2) / 2 + # phi1 = HV_ if self.h is None else np.sum(G_**2) / 2 + # phi0prime = inc if self.h is None else -np.sum(R**2) + # alpha = -phi0prime * alpha**2 / (phi1 - phi0 - phi0prime * alpha) / 2 + # # alpha *= tau + # if 1 < 2: + # alpha *= 0.5 + # else: + # self.logger.warn("Armijo's backtracking line search failed") + # return alpha def _line_search_dominated(self, X: np.ndarray, step: np.ndarray) -> float: """backtracking line search with Armijo's condition""" @@ -363,9 +417,9 @@ def _line_search_dominated(self, X: np.ndarray, step: np.ndarray) -> float: self.logger.warn("Armijo's backtracking line search failed") return alpha - def _check_XY(self): + def _check_population(self): # get unique points: if some points converge to the same location - primal_vars = self.X[:, : self.dim_p] + primal_vars = self.state.primal D = cdist(primal_vars, primal_vars) drop_idx_X = set([]) for i in range(self.N): @@ -373,54 +427,17 @@ def _check_XY(self): drop_idx_X |= set(np.nonzero(np.isclose(D[i, :], 0, rtol=self.eps))[0]) - set([i]) # get rid of weakly-dominated points - # TODO: Ad-hoc solution! check if this is still needed - # since the hypervolume indicator module is upgraded drop_idx_Y = set([]) # TODO: Ad-hoc solution! check if this is still needed - if self.problem_name is not None and self.problem_name not in ("Eq1DTLZ4", "Eq1IDTLZ4"): - for i in range(self.N): - if i not in drop_idx_Y: - drop_idx_Y |= set(np.nonzero(np.any(np.isclose(self.Y[i, :], self.Y), axis=1))[0]) - set( - [i] - ) + # if self.problem_name is not None and self.problem_name not in ("Eq1DTLZ4", "Eq1IDTLZ4"): + # for i in range(self.N): + # if i not in drop_idx_Y: + # drop_idx_Y |= set(np.nonzero(np.any(np.isclose(self.Y[i, :], self.Y), axis=1))[0]) - set( + # [i] + # ) idx = list(set(range(self.N)) - (drop_idx_X | drop_idx_Y)) - self.N = len(idx) - self.X = self.X[idx, :] - self.Y = self.Y[idx, :] - - def log(self): - HV = hypervolume(self.Y, self.ref) - self.history_Y += [self.Y.copy()] - self.history_X += [self._get_primal_dual(self.X.copy())[0]] - self.history_HV += [HV] - - if self.verbose: - self.logger.info(f"iteration {self.iter_count} ---") - self.logger.info(f"HV: {HV}") - # self.logger.info(f"step size: {self.step_size}") - self.logger.info(f"#non-dominated: {len(self._nondominated_idx)}") - - if self.iter_count >= 1: - try: - self._delta_X = np.mean( - np.sqrt(np.sum((self.history_X[-1] - self.history_X[-2]) ** 2, axis=1)) - ) - self._delta_Y = np.mean( - np.sqrt(np.sum((self.history_Y[-1] - self.history_Y[-2]) ** 2, axis=1)) - ) - self._delta_HV = np.abs(self.history_HV[-1] - self.history_HV[-2]) - except: - pass - - if self.h is not None: - self.hist_R_norm += [np.median(np.linalg.norm(self.G[self._nondominated_idx], axis=1))] - self.logger.info(f"G norm: {self.hist_R_norm[-1]}") - - def terminate(self) -> bool: - if self.iter_count >= self.max_iters: - self.stop_dict["iter_count"] = self.iter_count - - return bool(self.stop_dict) + self.state = self.state[idx] + self.N = self.state.N class DpN: @@ -493,7 +510,7 @@ def __init__( self.xl = xl self.xu = xu self._check_constraints(h, g) - self.state = State(self.dim_p, self.n_eq, self.n_ieq, func, jac, h, h_jac, g, g_jac) + self.state = State(self.dim_p, self.n_eq, self.n_ieq, func, jac, h=h, h_jac=h_jac, g=g, g_jac=g_jac) self._initialize(x0) self._set_indicator(ref, func, jac, hessian) self._set_logging(verbose) @@ -676,7 +693,7 @@ def _compute_R( dH, idx = None, None if self._constrained: func = lambda g, dual, h: g + np.einsum("j,jk->k", dual, h) - v, idx, dH = state.cstr_value, state.active_indices, state.dH + v, idx, dH = state.cstr_value, state.active_indices, state.cstr_grad dH = [dH[i][idx, :] for i, idx in enumerate(idx)] R = [np.r_[func(grad[i], dual_vars[i, k], dH[i]), v[i, k]] for i, k in enumerate(idx)] return R, dH, idx @@ -782,7 +799,7 @@ def _backtracking_line_search( def phi_func(alpha, i): state = deepcopy(self.state) - x = state.X[i] + x = state.X[i].copy() x += alpha * step[i] state.update_one(x, i) self.active_indicator.re_match = False diff --git a/tests/test_2D.py b/tests/test_2D.py index e776142..e713ac8 100644 --- a/tests/test_2D.py +++ b/tests/test_2D.py @@ -11,7 +11,7 @@ def test_2D_case1(): ref = np.array([11, 10]) hvh = HypervolumeDerivatives(2, 2, ref, minimization=True) - out = hvh.compute(X=np.array([(10, 1), (9.5, 3), (8, 6.5), (4, 8), (1, 9)])) + out = hvh._compute_hessian(X=np.array([(10, 1), (9.5, 3), (8, 6.5), (4, 8), (1, 9)])) assert np.all(out["HVdY"] == np.array([-2, -1, -3.5, -0.5, -1.5, -1.5, -1, -4, -1, -3])) assert np.all( out["HVdY2"] @@ -50,8 +50,8 @@ def MOP1_Hessian(x): def test_2D_case2(): ref = np.array([20, 20]) hvh = HypervolumeDerivatives( - n_decision_var=2, - n_objective=2, + n_var=2, + n_obj=2, ref=ref, func=MOP1, jac=MOP1_Jacobian, @@ -60,7 +60,7 @@ def test_2D_case2(): ) for _ in range(10): X = np.random.rand(3, 2) - out = hvh.compute(X) + out = hvh._compute_hessian(X) AD = hvh.compute_automatic_differentiation(X) assert np.all(np.isclose(out["HVdX"], AD["HVdX"])) assert np.all(np.isclose(out["HVdX2"], AD["HVdX2"])) diff --git a/tests/test_3D.py b/tests/test_3D.py index 7d01c7d..11ad6df 100644 --- a/tests/test_3D.py +++ b/tests/test_3D.py @@ -9,7 +9,7 @@ def test_3D_case1(): ref = np.array([9, 10, 12]) hvh = HypervolumeDerivatives(3, 3, ref, minimization=True) - out = hvh.compute(X=np.array([[5, 1, 7], [2, 3, 10]])) + out = hvh._compute_hessian(X=np.array([[5, 1, 7], [2, 3, 10]])) assert np.all( out["HVdY2"] == np.array( @@ -28,7 +28,7 @@ def test_3D_case1(): def test_3D_case2(): ref = np.array([9, 10, 12]) hvh = HypervolumeDerivatives(3, 3, ref, minimization=True) - out = hvh.compute(X=np.array([[-1, 5, 7], [2, 3, 10]])) + out = hvh._compute_hessian(X=np.array([[-1, 5, 7], [2, 3, 10]])) assert np.all( out["HVdY2"] == np.array( @@ -47,7 +47,7 @@ def test_3D_case2(): def test_3D_case3(): ref = np.array([9, 10, 12]) hvh = HypervolumeDerivatives(3, 3, ref, minimization=True) - out = hvh.compute(X=np.array([[5, 3, 7], [2, 1, 10]])) + out = hvh._compute_hessian(X=np.array([[5, 3, 7], [2, 1, 10]])) assert np.all( out["HVdY2"] == np.array( @@ -66,7 +66,7 @@ def test_3D_case3(): def test_3D_case4(): ref = np.array([10, 13, 23]) hvh = HypervolumeDerivatives(3, 3, ref, minimization=True) - out = hvh.compute(X=np.array([(8, 7, 10), (4, 11, 17), (2, 9, 21)])) + out = hvh._compute_hessian(X=np.array([(8, 7, 10), (4, 11, 17), (2, 9, 21)])) assert np.all(out["HVdY"] == np.array([-62, -26, -12, -8, -16, -8, -8, -12, -16])) assert np.all( out["HVdY2"] @@ -89,7 +89,7 @@ def test_3D_case4(): def test_3D_case5(): ref = np.array([17, 35, 7]) hvh = HypervolumeDerivatives(3, 3, ref, minimization=True) - out = hvh.compute( + out = hvh._compute_hessian( X=np.array([(16, 23, 1), (14, 32, 2), (12, 27, 3), (10, 21, 4), (8, 33, 5), (6.5, 31, 6)]) ) assert np.all( @@ -126,7 +126,7 @@ def test_3D_case5(): def test_3D_case6(): ref = np.array([3, 3, 3]) hvh = HypervolumeDerivatives(3, 3, ref, minimization=True) - out = hvh.compute(X=np.array([(1, 2, -1)])) + out = hvh._compute_hessian(X=np.array([(1, 2, -1)])) assert np.all( out["HVdY2"] == np.array( @@ -142,7 +142,7 @@ def test_3D_case6(): def test_with_dominated_points(): ref = np.array([9, 10, 12]) hvh = HypervolumeDerivatives(3, 3, ref, minimization=True) - out = hvh.compute(X=np.array([[-1, -2, 7], [2, 1, 10]])) + out = hvh._compute_hessian(X=np.array([[-1, -2, 7], [2, 1, 10]])) assert np.all( out["HVdY2"] == np.array( @@ -191,9 +191,7 @@ def MOP1_Hessian(x): return np.array([2 * np.eye(3), 2 * np.eye(3), 2 * np.eye(3)]) -hvh = HypervolumeDerivatives( - n_decision_var=3, n_objective=3, ref=ref, func=MOP1, jac=MOP1_Jacobian, hessian=MOP1_Hessian -) +hvh = HypervolumeDerivatives(n_var=3, n_obj=3, ref=ref, func=MOP1, jac=MOP1_Jacobian, hessian=MOP1_Hessian) def test_against_autograd(): @@ -201,7 +199,7 @@ def test_against_autograd(): w = np.random.rand(20, 3) w /= np.sum(w, axis=1).reshape(-1, 1) X = w @ np.vstack([c1, c2, c3]) - out = hvh.compute(X) + out = hvh._compute_hessian(X) AD = hvh.compute_automatic_differentiation(X) assert np.all(np.isclose(AD["HVdY"], out["HVdY"])) diff --git a/tests/test_ND.py b/tests/test_ND.py index 47e7808..da39281 100644 --- a/tests/test_ND.py +++ b/tests/test_ND.py @@ -44,14 +44,14 @@ def test_4D(n_objective): ref = Y.max(axis=0) * 1.2 hvh = HypervolumeDerivatives( - n_decision_var=dim, - n_objective=n_objective, + n_var=dim, + n_obj=n_objective, ref=ref, func=func, jac=jac, hessian=hessian, ) - out = hvh.compute(X) + out = hvh._compute_hessian(X) AD = hvh.compute_automatic_differentiation(X) assert np.all(np.isclose(AD["HVdY"], out["HVdY"], atol=1e-5, rtol=1e-8))