From 25aa1d424ee39b127b13ff5c28c60006ee560e91 Mon Sep 17 00:00:00 2001
From: wangronin <wangronin@gmail.com>
Date: Fri, 5 Jul 2024 15:40:05 +0200
Subject: [PATCH] update

---
 examples/HVN/convex_example.py            |   7 +-
 examples/HV_Hessian/2D_example_Hessian.py |   6 +-
 examples/HV_Hessian/3D_example_Hessian.py |   6 +-
 examples/HV_Hessian/4D_example_Hessian.py |   2 +-
 examples/MMD/example_ZDT.py               |   1 +
 hvd/base.py                               | 134 +++++---
 hvd/hypervolume_derivatives.py            | 174 +++++++----
 hvd/mmd.py                                |   6 +-
 hvd/mmd_newton.py                         |   6 +-
 hvd/newton.py                             | 359 +++++++++++-----------
 tests/test_2D.py                          |   8 +-
 tests/test_3D.py                          |  20 +-
 tests/test_ND.py                          |   6 +-
 13 files changed, 422 insertions(+), 313 deletions(-)

diff --git a/examples/HVN/convex_example.py b/examples/HVN/convex_example.py
index 17f0d29..7ddf31d 100644
--- a/examples/HVN/convex_example.py
+++ b/examples/HVN/convex_example.py
@@ -52,7 +52,7 @@ def g_Hessian(x):
 ref = np.array([1, 1])
 max_iters = 10
 
-N = 10
+N = 15
 x1 = np.random.rand(N)
 x2 = x1**2 - 2 * x1 + 1
 x0 = np.c_[x1, x2]
@@ -75,6 +75,7 @@ def g_Hessian(x):
     minimization=True,
     max_iters=max_iters,
     verbose=True,
+    preconditioning=True,
 )
 X, Y, stop = opt.run()
 
@@ -112,8 +113,8 @@ def g_Hessian(x):
 ax0.legend([r"$Y_{\text{final}}$", r"$Y_0$", "Pareto front"])
 
 ax12 = ax1.twinx()
-ax1.plot(range(1, len(opt.history_HV) + 1), opt.history_HV, "b-")
-ax12.semilogy(range(1, len(opt.history_HV) + 1), opt.hist_R_norm, "g--")
+ax1.plot(range(1, len(opt.history_indicator_value) + 1), opt.history_indicator_value, "b-")
+ax12.semilogy(range(1, len(opt.history_R_norm) + 1), opt.history_R_norm, "g--")
 ax1.set_ylabel("HV", color="b")
 ax12.set_ylabel(r"$||G(\mathbf{X})||$", color="g")
 ax1.set_title("Performance")
diff --git a/examples/HV_Hessian/2D_example_Hessian.py b/examples/HV_Hessian/2D_example_Hessian.py
index 18710dd..f518805 100644
--- a/examples/HV_Hessian/2D_example_Hessian.py
+++ b/examples/HV_Hessian/2D_example_Hessian.py
@@ -21,8 +21,8 @@ def MOP1_Hessian(x):
 
 ref = np.array([20, 20])
 hvh = HypervolumeDerivatives(
-    n_decision_var=2,
-    n_objective=2,
+    n_var=2,
+    n_obj=2,
     ref=ref,
     func=MOP1,
     jac=MOP1_Jacobian,
@@ -33,7 +33,7 @@ def MOP1_Hessian(x):
     X = np.random.rand(10, 2)
     Y = np.array([MOP1(_) for _ in X])
     idx = get_non_dominated(-1 * Y, return_index=True)
-    out = hvh.compute(X[idx, :])
+    out = hvh._compute_hessian(X[idx, :])
     HdY2, HdX2 = out["HVdY2"], out["HVdX2"]
     HdX = out["HVdX"]
     w, v = np.linalg.eigh(HdX2)
diff --git a/examples/HV_Hessian/3D_example_Hessian.py b/examples/HV_Hessian/3D_example_Hessian.py
index cf18f4d..cbd4248 100644
--- a/examples/HV_Hessian/3D_example_Hessian.py
+++ b/examples/HV_Hessian/3D_example_Hessian.py
@@ -16,14 +16,14 @@
     ref = np.array([2, 2, 2])
 
     hvh = HypervolumeDerivatives(
-        n_decision_var=dim,
-        n_objective=3,
+        n_var=dim,
+        n_obj=3,
         ref=ref,
         func=f.objective,
         jac=f.objective_jacobian,
         hessian=f.objective_hessian,
     )
-    out = hvh.compute(X)
+    out = hvh._compute_hessian(X)
     # automatic differentiation is very slow;
     # used for verifying the result of the analytical computation
     AD = hvh.compute_automatic_differentiation(X)
diff --git a/examples/HV_Hessian/4D_example_Hessian.py b/examples/HV_Hessian/4D_example_Hessian.py
index 1317c67..f286c51 100644
--- a/examples/HV_Hessian/4D_example_Hessian.py
+++ b/examples/HV_Hessian/4D_example_Hessian.py
@@ -10,7 +10,7 @@
 ref = np.array([17, 35, 7, 10])
 hvh = HypervolumeDerivatives(4, 4, ref, minimization=True)
 X = np.array([(16, 23, 1, 8), (14, 32, 2, 5), (12, 27, 3, 1), (10, 21, 4, 9), (8, 33, 5, 3)])
-out = hvh.compute(X)
+out = hvh._compute_hessian(X)
 
 assert np.all(
     out["HVdY2"]
diff --git a/examples/MMD/example_ZDT.py b/examples/MMD/example_ZDT.py
index 593df89..4448316 100644
--- a/examples/MMD/example_ZDT.py
+++ b/examples/MMD/example_ZDT.py
@@ -74,6 +74,7 @@
     max_iters=max_iters,
     verbose=True,
     metrics=metrics,
+    preconditioning=True,
 )
 X, Y, _ = opt.run()
 
diff --git a/hvd/base.py b/hvd/base.py
index d806919..370626e 100644
--- a/hvd/base.py
+++ b/hvd/base.py
@@ -1,6 +1,7 @@
-import numpy as np
+from copy import deepcopy
+from typing import Callable, Self, Tuple
 
-from .utils import merge_lists
+import numpy as np
 
 
 class State:
@@ -9,24 +10,31 @@ def __init__(
         n_var: int,
         n_eq: int,
         n_ieq: int,
-        func: callable,
-        jac: callable,
-        h: callable = None,
-        h_jac: callable = None,
-        g: callable = None,
-        g_jac: callable = None,
+        func: Callable,
+        jac: Callable,
+        h: Callable = None,
+        h_jac: Callable = None,
+        h_hess: Callable = None,
+        g: Callable = None,
+        g_jac: Callable = None,
+        g_hess: Callable = None,
     ) -> None:
         self.n_var: int = n_var  # the number of the primal variables
         self.n_eq: int = n_eq
         self.n_ieq: int = n_ieq
-        self.func: callable = func
-        self._jac: callable = jac
-        self.h: callable = h
-        self.h_jac: callable = h_jac
-        self.g: callable = g
-        self.g_jac: callable = g_jac
+        self.n_cstr: int = self.n_eq + self.n_ieq
+        self.func: Callable = func
+        self._jac: Callable = jac
+        self.h: Callable = h
+        self.h_jac: Callable = h_jac
+        self.h_hess: Callable = h_hess
+        self.g: Callable = g
+        self.g_jac: Callable = g_jac
+        self.g_hess: Callable = g_hess
         self._constrained: bool = self.g is not None or self.h is not None
         self.n_jac_evals: int = 0
+        self.n_cstr_jac_evals: int = 0
+        self.n_cstr_hess_evals: int = 0
 
     @property
     def N(self):
@@ -37,32 +45,67 @@ def jac(self, x: np.ndarray) -> np.ndarray:
         self.n_jac_evals += 1
         return self._jac(x)
 
+    def evaluate_cstr(self, x: np.ndarray) -> Tuple[np.ndarray, np.ndarray]:
+        H = self.h(x) if self.h is not None else []
+        G = self.g(x) if self.g is not None else []
+        if isinstance(H, (int, float)):
+            H = np.array([H])
+        if isinstance(G, (int, float)):
+            G = np.array([G])
+        active_indices = [True] * self.n_eq
+        if self.g is not None:
+            active_indices += (G >= -1e-4).tolist()
+        return np.r_[H, G], np.array(active_indices)
+
+    def evaluate_cstr_jac(self, x: np.ndarray) -> np.ndarray:
+        if self.g_jac is not None or self.h_jac is not None:
+            self.n_cstr_jac_evals += 1
+        # TODO: only evaluate active inequality constraints
+        dH = self.h_jac(x).tolist() if self.h_jac is not None else []
+        dG = self.g_jac(x).tolist() if self.g_jac is not None else []
+        return np.array(dH + dG).reshape(self.n_cstr, self.n_var)
+
+    def evaluate_cstr_hess(self, x: np.ndarray) -> np.ndarray:
+        if self.g_hess is not None or self.h_hess is not None:
+            self.n_cstr_hess_evals += 1
+        # TODO: only evaluate active inequality constraints
+        ddH = self.h_hess(x).tolist() if self.h_hess is not None else []
+        ddG = self.g_hess(x).tolist() if self.g_hess is not None else []
+        return np.array(ddH + ddG).reshape(self.n_cstr, self.n_var, self.n_var)
+
     def update(self, X: np.ndarray, compute_gradient: bool = True):
         self.X = X
         primal_vars = self.primal
         self.Y = np.array([self.func(x) for x in primal_vars])
         self.J = np.array([self.jac(x) for x in primal_vars]) if compute_gradient else None
-        if self._constrained:
-            eq = self._evaluate_constraints(primal_vars, type="eq", compute_gradient=compute_gradient)
-            ieq = self._evaluate_constraints(primal_vars, type="ieq", compute_gradient=compute_gradient)
-            cstr_value, active_indices, dH = merge_lists(eq, ieq)
-            self.cstr_value = cstr_value
-            self.active_indices = active_indices
-            self.dH = dH
+        cstr_value, active_indices = list(zip(*[self.evaluate_cstr(x) for x in primal_vars]))
+        self.cstr_value = np.array(cstr_value)
+        self.active_indices = np.array(active_indices)
+        self.cstr_grad = np.array([self.evaluate_cstr_jac(x) for x in primal_vars])
+        self.cstr_hess = np.array([self.evaluate_cstr_hess(x) for x in primal_vars])
 
     def update_one(self, x: np.ndarray, k: int):
+        primal_vars = x[: self.n_var]
         self.X[k] = x
-        x = np.atleast_2d(x)
-        primal_vars = x[:, : self.n_var]
-        self.Y[k] = self.func(primal_vars[0])
-        self.J[k] = self.jac(primal_vars[0])
-        if self._constrained:
-            eq = self._evaluate_constraints(primal_vars, type="eq", compute_gradient=True)
-            ieq = self._evaluate_constraints(primal_vars, type="ieq", compute_gradient=True)
-            cstr_value, active_indices, dH = merge_lists(eq, ieq)
+        self.Y[k] = self.func(primal_vars)
+        self.J[k] = self.jac(primal_vars)
+        cstr_value, active_indices = self.evaluate_cstr(primal_vars)
+        cstr_grad = self.evaluate_cstr_jac(primal_vars)
+        cstr_hess = self.evaluate_cstr_hess(primal_vars)
+        if cstr_value is not None:
             self.cstr_value[k] = cstr_value
             self.active_indices[k] = active_indices
-            self.dH[k] = dH
+        if cstr_grad is not None:
+            self.cstr_grad[k] = cstr_grad
+        if cstr_hess is not None:
+            self.cstr_hess[k] = cstr_hess
+
+    def is_feasible(self) -> np.ndarray:
+        # TODO: should the active ieq cstr be considered infeasible?
+        ieq_active_idx = self.active_indices[:, self.n_eq :]
+        eq_feasible = np.all(np.isclose(self.H, 0, atol=1e-4, rtol=0), axis=1)
+        ieq_feasible = ~np.any(ieq_active_idx, axis=1)
+        return np.bitwise_and(eq_feasible, ieq_feasible)
 
     @property
     def primal(self) -> np.ndarray:
@@ -84,18 +127,19 @@ def G(self) -> np.ndarray:
         """Inequality constraint values"""
         return self.cstr_value[:, self.n_eq :]
 
-    def _evaluate_constraints(
-        self,
-        primal_vars: np.ndarray,
-        type: str = "eq",
-        compute_gradient: bool = True,
-    ):
-        N = len(primal_vars)
-        func = self.h if type == "eq" else self.g
-        jac = self.h_jac if type == "eq" else self.g_jac
-        value = np.array([func(x) for x in primal_vars]).reshape(N, -1)
-        active_indices = [[True] * self.n_eq] * N if type == "eq" else [v >= -1e-4 for v in value]
-        active_indices = np.array(active_indices)
-        if compute_gradient:
-            dH = np.array([jac(x).reshape(-1, self.n_var) for x in primal_vars])
-        return (value, active_indices, dH) if compute_gradient else (value, active_indices)
+    def __getitem__(self, indices: np.ndarray) -> Self:
+        """Slicing the state class
+
+        Args:
+            indices (np.ndarray): indices to select
+        """
+        obj = deepcopy(self)
+        obj.n_jac_evals = 0
+        obj.X = obj.X[indices]
+        obj.Y = obj.Y[indices]
+        obj.J = obj.J[indices]
+        if self._constrained:
+            obj.cstr_value = obj.cstr_value[indices]
+            obj.active_indices = obj.active_indices[indices]
+            obj.cstr_grad = obj.cstr_grad[indices]
+        return obj
diff --git a/hvd/hypervolume_derivatives.py b/hvd/hypervolume_derivatives.py
index e45c5e3..66b3404 100644
--- a/hvd/hypervolume_derivatives.py
+++ b/hvd/hypervolume_derivatives.py
@@ -1,6 +1,6 @@
 from typing import Dict, List, Tuple, Union
 
-# TODO: remove dependencies on `autograd`
+# TODO: remove dependencies on `autograd` and move to jax
 import autograd.numpy as np
 import numpy as np
 from autograd import hessian, jacobian
@@ -9,7 +9,7 @@
 from .hypervolume import hypervolume
 from .utils import non_domin_sort
 
-__author__ = ["Hao Wang"]
+__author__ = "Hao Wang"
 
 
 def HVY(n_objective: int, ref: np.ndarray) -> callable:
@@ -36,8 +36,8 @@ class HypervolumeDerivatives:
 
     def __init__(
         self,
-        n_decision_var: int,
-        n_objective: int,
+        n_var: int,
+        n_obj: int,
         ref: Union[np.ndarray, List[List]],
         func: callable = None,
         jac: callable = None,
@@ -73,8 +73,8 @@ def __init__(
         if hessian is None:
             hessian = lambda x: np.zeros((len(x), len(x), len(x)))
 
-        self.n_decision_var = int(n_decision_var)
-        self.n_objective = int(n_objective)
+        self.n_var = int(n_var)
+        self.n_obj = int(n_obj)
         self.func = func if minimization else lambda x: -1 * func(x)
         self.jac = jac if minimization else lambda x: -1 * jac(x)
         self.hessian = hessian if minimization else lambda x: -1 * hessian(x)
@@ -103,7 +103,7 @@ def objective_points(self, points):
         if not isinstance(points, np.ndarray):
             points = np.asarray(points)
         self._objective_points = points
-        assert self.n_objective == self._objective_points.shape[1]
+        assert self.n_obj == self._objective_points.shape[1]
 
         if self._objective_points.shape[1] == 1:
             self._nondominated_indices = np.array([np.argmin(self._objective_points.ravel())])
@@ -114,45 +114,58 @@ def objective_points(self, points):
 
     def _check_X(self, X: np.ndarray):
         X = np.atleast_2d(X)
-        if X.shape[1] != self.n_decision_var:
-            X = X.reshape(-1, self.n_decision_var)
+        if X.shape[1] != self.n_var:
+            X = X.reshape(-1, self.n_var)
         return X
 
     def _check_Y(self, Y: np.ndarray):
         Y = np.atleast_2d(Y)
-        if Y.shape[1] != self.n_objective:
-            Y = Y.reshape(-1, self.n_decision_var)
+        if Y.shape[1] != self.n_obj:
+            Y = Y.reshape(-1, self.n_var)
         return Y
 
-    def HV(self, X: np.ndarray) -> float:
-        X = self._check_X(X)
-        Y = np.array([self.func(x) for x in X])
+    def compute(self, X: np.ndarray = None, Y: np.ndarray = None) -> float:
+        if Y is None:
+            assert X is not None
+            assert self.func is not None
+            X = self._check_X(X)
+            Y = np.array([self.func(x) for x in X])
         return hypervolume(Y, self.ref)
 
-    def project(
-        self, axis: int, i: int, pareto_front: np.ndarray = None, ref: np.ndarray = None
-    ) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]:
-        """projecting the Pareto front along `axis` with respect to the i-th point"""
-        pareto_front = self.objective_points if pareto_front is None else pareto_front
-        ref = self.ref if ref is None else ref
-        y = pareto_front[i, :]
-        # projection: drop the `axis`-th dimension
-        y_ = np.delete(y, obj=axis)
-        ref_ = np.delete(ref, obj=axis)
-        idx = np.nonzero(pareto_front[:, axis] < y[axis])[0]
-        pareto_front_ = np.delete(pareto_front[idx, :], obj=axis, axis=1)
-        if len(pareto_front_) != 0:
-            if pareto_front_.shape[1] == 1:
-                pareto_indices = np.array([np.argmin(pareto_front_.ravel())])
-            else:
-                pareto_indices = non_domin_sort(pareto_front_, only_front_indices=True)[0]
-            # NOTE: implement a fast algorithm to get the non dominated subset
-            # pareto_indices = get_non_dominated(pareto_front_, return_index=True)
-            pareto_front_ = pareto_front_[pareto_indices]
-            idx = idx[pareto_indices]
-        return y_, pareto_front_, ref_, idx
+    def compute_derivatives(
+        self,
+        X: np.ndarray,
+        Y: np.ndarray = None,
+        compute_hessian: bool = True,
+        YdX: np.ndarray = None,
+    ) -> Union[np.ndarray, Tuple[np.ndarray, np.ndarray]]:
+        """compute the derivatives of the inverted generational distance^p
+
+        Args:
+            X (np.ndarray): the decision points of shape (N, dim).
+            Y (np.ndarray, optional): the objective points of shape (N, n_objective). Defaults to None.
+            compute_hessian (bool, optional): whether the Hessian is computed. Defaults to True.
+            jacobian (np.ndarray, optional): Jacobian of the objective function at `X`. Defaults to None.
 
-    def _compute_gradient(self, X: np.ndarray, Y: np.ndarray = None) -> Dict[str, np.ndarray]:
+        Returns:
+            Union[np.ndarray, Tuple[np.ndarray, np.ndarray]]:
+                if `compute_hessian` = True, it returns (gradient, Hessian)
+                otherwise, it returns (gradient, )
+        """
+        if compute_hessian:
+            out = self._compute_hessian(X, Y, YdX)
+            HVdX, HVdX2 = out["HVdX"], out["HVdX2"]
+        else:
+            HVdX = self._compute_gradient(X, Y, YdX)["HVdX"]
+        HVdX = HVdX.reshape(len(X), -1)
+        return (HVdX, HVdX2) if compute_hessian else HVdX
+
+    def _compute_gradient(
+        self,
+        X: np.ndarray,
+        Y: np.ndarray = None,
+        YdX: np.ndarray = None,
+    ) -> Dict[str, np.ndarray]:
         """compute the hypervolume gradient using analytical expressions
 
         Parameters
@@ -177,17 +190,19 @@ def _compute_gradient(self, X: np.ndarray, Y: np.ndarray = None) -> Dict[str, np
             }
         """
         X = self._check_X(X)
-        Y, YdX, YdX2 = self._compute_objective_derivatives(X, Y)
+        Y, YdX = self._compute_objective_derivatives(X, Y, YdX)
         self.objective_points = Y
-        HVdY = np.zeros((self.N, self.n_objective))
+        HVdY = np.zeros((self.N, self.n_obj))
         HVdY[self._nondominated_indices] = self.hypervolume_dY(
             self.objective_points[self._nondominated_indices], self.ref
         )
         HVdY = HVdY.reshape(1, -1)[0]
         HVdX = HVdY @ YdX
-        return dict(HVdX=HVdX, HVdY=HVdY, YdX=YdX, YdX2=YdX2)
+        return dict(HVdX=HVdX, HVdY=HVdY, YdX=YdX)
 
-    def compute(self, X: np.ndarray, Y: np.ndarray = None) -> Dict[str, np.ndarray]:
+    def _compute_hessian(
+        self, X: np.ndarray, Y: np.ndarray = None, YdX: np.ndarray = None
+    ) -> Dict[str, np.ndarray]:
         """compute the hypervolume gradient and Hessian matrix using analytical expressions
 
         Parameters
@@ -211,15 +226,17 @@ def compute(self, X: np.ndarray, Y: np.ndarray = None) -> Dict[str, np.ndarray]:
             }
         """
         X = self._check_X(X)
-        res = self._compute_gradient(X, Y)
-        HVdY, HVdX, YdX, YdX2 = res["HVdY"], res["HVdX"], res["YdX"], res["YdX2"]
-        HVdY2 = np.zeros((self.N * self.n_objective, self.N * self.n_objective))
+        # TODO: fix passing the input arugment `YdX`
+        res = self._compute_gradient(X, Y, YdX)
+        Y, YdX, YdX2 = self._compute_objective_derivatives(X, Y, YdX, compute_hessian=True)
+        HVdY, HVdX = res["HVdY"], res["HVdX"]
+        HVdY2 = np.zeros((self.N * self.n_obj, self.N * self.n_obj))
         for i in range(self.N):
             if i in self._dominated_indices:  # if the point is dominated
                 continue
-            for k in range(self.n_objective):
+            for k in range(self.n_obj):
                 # project along `axis`; `*_` indicates variables in the projected subspace
-                y_, pareto_front_, ref_, proj_idx = self.project(axis=k, i=i)
+                y_, pareto_front_, ref_, proj_idx = self._project(axis=k, i=i)
                 # partial derivatives ∂(∂HV/∂y_k^i)/∂y^i
                 # of shape (1, dim), where the k-th element is zero
                 Y_ = np.vstack([y_, pareto_front_])
@@ -232,8 +249,8 @@ def compute(self, X: np.ndarray, Y: np.ndarray = None) -> Dict[str, np.ndarray]:
                 idx = np.where(pareto_indices == 0)[0]
                 out = self.hypervolume_dY(Y_[pareto_indices], ref_)[idx]
 
-                rows = slice(i * self.n_objective, (i + 1) * self.n_objective)
-                HVdY2[rows, i * self.n_objective + k] = np.insert(-1.0 * out, k, 0)
+                rows = slice(i * self.n_obj, (i + 1) * self.n_obj)
+                HVdY2[rows, i * self.n_obj + k] = np.insert(-1.0 * out, k, 0)
                 # partial derivatives ∂(∂HV/∂y_k^i)/∂y^{-i}
                 # of shape (len(proj_idx), dim), where the k-th element is zero
                 # ∂HV/∂y_k^i is the hypervolume improvement of `x_` w.r.t. `pareto_front_`
@@ -244,8 +261,8 @@ def compute(self, X: np.ndarray, Y: np.ndarray = None) -> Dict[str, np.ndarray]:
                 # hypervolume improvement of points in `pareto_front_` decreases ∂HV/∂y_k^i
                 out = np.insert(out, k, 0, axis=1)
                 for s, j in enumerate(proj_idx):
-                    rows = slice(j * self.n_objective, (j + 1) * self.n_objective)
-                    HVdY2[rows, i * self.n_objective + k] = out[s]
+                    rows = slice(j * self.n_obj, (j + 1) * self.n_obj)
+                    HVdY2[rows, i * self.n_obj + k] = out[s]
 
         # TODO: use sparse matrix multiplication here
         HVdX2 = YdX.T @ HVdY2 @ YdX + np.einsum("...i,i...", HVdY, YdX2)
@@ -258,6 +275,29 @@ def compute(self, X: np.ndarray, Y: np.ndarray = None) -> Dict[str, np.ndarray]:
             HVdY2=HVdY2,
         )
 
+    def _project(
+        self, axis: int, i: int, pareto_front: np.ndarray = None, ref: np.ndarray = None
+    ) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]:
+        """projecting the Pareto front along `axis` with respect to the i-th point"""
+        pareto_front = self.objective_points if pareto_front is None else pareto_front
+        ref = self.ref if ref is None else ref
+        y = pareto_front[i, :]
+        # projection: drop the `axis`-th dimension
+        y_ = np.delete(y, obj=axis)
+        ref_ = np.delete(ref, obj=axis)
+        idx = np.nonzero(pareto_front[:, axis] < y[axis])[0]
+        pareto_front_ = np.delete(pareto_front[idx, :], obj=axis, axis=1)
+        if len(pareto_front_) != 0:
+            if pareto_front_.shape[1] == 1:
+                pareto_indices = np.array([np.argmin(pareto_front_.ravel())])
+            else:
+                pareto_indices = non_domin_sort(pareto_front_, only_front_indices=True)[0]
+            # NOTE: implement a fast algorithm to get the non dominated subset
+            # pareto_indices = get_non_dominated(pareto_front_, return_index=True)
+            pareto_front_ = pareto_front_[pareto_indices]
+            idx = idx[pareto_indices]
+        return y_, pareto_front_, ref_, idx
+
     def compute_automatic_differentiation(self, X: np.ndarray) -> Dict[str, np.ndarray]:
         """compute the hypervolume gradient and Hessian matrix using automatic differentiation
 
@@ -282,9 +322,9 @@ def compute_automatic_differentiation(self, X: np.ndarray) -> Dict[str, np.ndarr
             }
         """
         if self._HV_Jac is None:
-            self._HV_Jac = jacobian(HVY(self.n_objective, self.ref))
+            self._HV_Jac = jacobian(HVY(self.n_obj, self.ref))
         if self._HV_Hessian is None:
-            self._HV_Hessian = hessian(HVY(self.n_objective, self.ref))
+            self._HV_Hessian = hessian(HVY(self.n_obj, self.ref))
 
         X = self._check_X(X)
         Y, YdX, YdX2 = self._compute_objective_derivatives(X)
@@ -329,31 +369,37 @@ def hypervolume_dY(self, pareto_front: np.ndarray, ref: np.ndarray) -> np.ndarra
             # higher dimensional cases: recursive computation
             for i in range(N):
                 for k in range(dim):
-                    y_, pareto_front_, ref_, _ = self.project(k, i, pareto_front, ref)
+                    y_, pareto_front_, ref_, _ = self._project(k, i, pareto_front, ref)
                     # NOTE: `-1.0` -> since we assume a minimization problem
                     HVdY[i, k] = -1.0 * hypervolume_improvement(y_, pareto_front_, ref_)
         return HVdY
 
     def _compute_objective_derivatives(
-        self, X: np.ndarray, Y: np.ndarray = None
+        self,
+        X: np.ndarray,
+        Y: np.ndarray = None,
+        YdX: np.ndarray = None,
+        compute_hessian: np.ndarray = False,
     ) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
         """compute the objective function value, the Jacobian, and Hessian tensor"""
         if not isinstance(X, np.ndarray):
             X = np.asarray(X)
-        if X.shape[1] != self.n_decision_var:
+        if X.shape[1] != self.n_var:
             X = X.T
         self.N = X.shape[0]  # number of points
         if Y is None:  # do not evaluate the function when `Y` is provided
             Y = np.array([self.func(x) for x in X])  # `(N, n_objective)`
         # Jacobians
-        YdX = np.array([self.jac(x) for x in X])  # `(N, n_objective, n_decision_var)`
+        # `(N, n_objective, n_decision_var)`
+        YdX = np.array([self.jac(x) for x in X]) if YdX is None else YdX.copy()
         YdX = block_diag(*YdX)  # `(N * n_objective, N * n_decision_var)` grouped by points
         # Hessians
-        _YdX2 = np.array([self.hessian(x) for x in X])  # `(N, n_objective, n_decision_var, n_decision_var)`
-        YdX2 = np.zeros(
-            (self.N * self.n_objective, self.N * self.n_decision_var, self.N * self.n_decision_var)
-        )
-        for i in range(self.N):
-            idx = slice(i * self.n_decision_var, (i + 1) * self.n_decision_var)
-            YdX2[i * self.n_objective : (i + 1) * self.n_objective, idx, idx] = _YdX2[i, ...]
-        return Y, YdX, YdX2
+        if compute_hessian:
+            _YdX2 = np.array(
+                [self.hessian(x) for x in X]
+            )  # `(N, n_objective, n_decision_var, n_decision_var)`
+            YdX2 = np.zeros((self.N * self.n_obj, self.N * self.n_var, self.N * self.n_var))
+            for i in range(self.N):
+                idx = slice(i * self.n_var, (i + 1) * self.n_var)
+                YdX2[i * self.n_obj : (i + 1) * self.n_obj, idx, idx] = _YdX2[i, ...]
+        return (Y, YdX, YdX2) if compute_hessian else (Y, YdX)
diff --git a/hvd/mmd.py b/hvd/mmd.py
index 502d7d9..f8fc588 100644
--- a/hvd/mmd.py
+++ b/hvd/mmd.py
@@ -253,9 +253,11 @@ def compute_derivatives(
                 if `compute_hessian` = True, it returns (gradient, Hessian)
                 otherwise, it returns (gradient, )
         """
-        grad = self.compute_gradient(X, Y, jacobian)["MMDdX"]
         if compute_hessian:
-            hessian = self.compute_hessian(X, Y)["MMDdX2"]
+            out = self.compute_hessian(X, Y)
+            grad, hessian = out["MMDdX"], out["MMDdX2"]
+        else:
+            grad = self.compute_gradient(X, Y, jacobian)["MMDdX"]
         return (grad, hessian) if compute_hessian else grad
 
     def compute_gradient(
diff --git a/hvd/mmd_newton.py b/hvd/mmd_newton.py
index 84ba55c..f816193 100644
--- a/hvd/mmd_newton.py
+++ b/hvd/mmd_newton.py
@@ -99,7 +99,7 @@ def __init__(
         self.xu: np.ndarray = xu
         self.ref: ClusteredReferenceSet = ref  # TODO: we should pass ref to the indicator directly
         self._check_constraints(h, g)
-        self.state: State = State(self.dim_p, self.n_eq, self.n_ieq, func, jac, h, h_jac, g, g_jac)
+        self.state = State(self.dim_p, self.n_eq, self.n_ieq, func, jac, h=h, h_jac=h_jac, g=g, g_jac=g_jac)
         # TODO: move indicator out of this class
         self.indicator = MMDMatching(
             self.dim_p, self.n_obj, self.ref, func, jac, hessian, theta=1.0 / N, beta=0.2
@@ -235,7 +235,7 @@ def _compute_R(
         if self._constrained:
             R = np.zeros((state.N, self.dim))  # the root-finding problem
             func = lambda g, dual, h: g + np.einsum("j,jk->k", dual, h)
-            cstr_value, idx, dH = state.cstr_value, state.active_indices, state.dH
+            cstr_value, idx, dH = state.cstr_value, state.active_indices, state.cstr_grad
             dH = [dH[i, idx, :] for i, idx in enumerate(idx)]  # only take Jacobian of the active constraints
             active_indices = np.c_[active_indices, idx]
             for i, k in enumerate(active_indices):
@@ -298,7 +298,7 @@ def _backtracking_line_search(
 
         def phi_func(alpha, i):
             state = deepcopy(self.state)
-            x = state.X[i]
+            x = state.X[i].copy()
             x += alpha * step[i]
             state.update_one(x, i)
             self.indicator.re_match = False
diff --git a/hvd/newton.py b/hvd/newton.py
index b1f91aa..b961985 100644
--- a/hvd/newton.py
+++ b/hvd/newton.py
@@ -10,7 +10,6 @@
 
 from .base import State
 from .delta_p import GenerationalDistance, InvertedGenerationalDistance
-from .hypervolume import hypervolume
 from .hypervolume_derivatives import HypervolumeDerivatives
 from .utils import compute_chim, get_logger, non_domin_sort, precondition_hessian, set_bounds
 
@@ -72,10 +71,20 @@ def __init__(
         self.xl: np.ndarray = xl
         self.xu: np.ndarray = xu
         self.ref: np.ndarray = ref
-        self.h_hessian: Callable = h_hessian
-        self.g_hessian: Callable = g_hessian
         self._check_constraints(h, g)
-        self.state: State = State(self.dim_p, self.n_eq, self.n_ieq, func, jac, h, h_jac, g, g_jac)
+        self.state = State(
+            self.dim_p,
+            self.n_eq,
+            self.n_ieq,
+            func,
+            jac,
+            h=h,
+            h_jac=h_jac,
+            h_hess=h_hessian,
+            g=g,
+            g_jac=g_jac,
+            g_hess=g_hessian,
+        )
         self.indicator = HypervolumeDerivatives(self.dim_p, self.n_obj, ref, func, jac, hessian)
         self._initialize(X0)
         self._set_logging(verbose)
@@ -128,15 +137,11 @@ def _set_logging(self, verbose: bool):
         self.curr_indicator_value: float = None
         self.history_Y: List[np.ndarray] = []
         self.history_X: List[np.ndarray] = []
-        self.history_HV: List[float] = []
-        # self.history_indicator_value: List[float] = []
+        self.history_indicator_value: List[float] = []
         self.history_R_norm: List[float] = []
         self.history_metrics: Dict[str, List] = defaultdict(list)
         self.history_medoids: Dict[int, List] = defaultdict(list)
         self.logger: logging.Logger = get_logger(logger_id=f"{self.__class__.__name__}", console=self.verbose)
-        self._delta_X: float = np.inf
-        self._delta_Y: float = np.inf
-        self._delta_HV: float = np.inf
 
     @property
     def xl(self):
@@ -158,32 +163,7 @@ def run(self) -> Tuple[np.ndarray, np.ndarray, Dict]:
         while not self.terminate():
             self.newton_iteration()
             self.log()
-        return self._get_primal_dual(self.X)[0], self.Y, self.stop_dict
-
-    def _precondition_hessian(self, H: np.ndarray) -> np.ndarray:
-        """Precondition the Hessian matrix to make sure it is negative definite
-
-        Args:
-            H (np.ndarray): the Hessian matrix
-
-        Returns:
-            np.ndarray: the preconditioned Hessian
-        """
-        # pre-condition the Hessian
-        beta = 1e-6
-        v = np.min(np.diag(-H))
-        tau = 0 if v > 0 else -v + beta
-        I = np.eye(H.shape[0])
-        for _ in range(35):
-            try:
-                cholesky(-H + tau * I, lower=True)
-                break
-            except:
-                # NOTE: the multiplier is not working for Eq1IDTLZ3.. Otherwise, it takes 1.5
-                tau = max(1.5 * tau, beta)
-        else:
-            self.logger.warn("Pre-conditioning the HV Hessian failed")
-        return H - tau * I
+        return self.state.primal, self.state.Y, self.stop_dict
 
     def _compute_R(
         self, state: State, grad: np.ndarray = None
@@ -195,38 +175,33 @@ def _compute_R(
                 the Jacobian of the equality constraints, and
                 the indices of the active primal-dual variables
         """
-        primal_vars, dual_vars = state.primal, state.dual
         if grad is None:
-            grad = self.indicator._compute_gradient(X=primal_vars, Y=state.Y, jacobian=state.J)
+            grad = self.indicator.compute_derivatives(state.primal, state.Y, False, state.J)
         # the unconstrained case
         R, dH, active_indices = grad, None, np.array([[True] * state.n_var] * state.N)
         if self._constrained:
             R = np.zeros((state.N, self.dim))  # the root-finding problem
             func = lambda g, dual, h: g + np.einsum("j,jk->k", dual, h)
-            cstr_value, idx, dH = state.cstr_value, state.active_indices, state.dH
+            cstr_value, idx, dH = state.cstr_value, state.active_indices, state.cstr_grad
             dH = [dH[i, idx, :] for i, idx in enumerate(idx)]  # only take Jacobian of the active constraints
             active_indices = np.c_[active_indices, idx]
             for i, k in enumerate(active_indices):
-                R[i, k] = np.r_[func(grad[i], dual_vars[i, idx[i]], dH[i]), cstr_value[i, idx[i]]]
+                R[i, k] = np.r_[func(grad[i], state.dual[i, idx[i]], dH[i]), cstr_value[i, idx[i]]]
         return R, dH, active_indices
 
-    def _compute_netwon_step(self) -> Tuple[np.ndarray, np.ndarray]:
-        primal_vars, dual_vars = self.state.primal, self.state.dual
-        grad, Hessian = self.indicator.compute(X=primal_vars, Y=dual_vars, jacobian=self.state.J)
-        if self.preconditioning:  # sometimes the Hessian is not NSD
+    def _compute_netwon_step(self, state: State) -> Tuple[np.ndarray, np.ndarray]:
+        grad, Hessian = self.indicator.compute_derivatives(X=state.primal, Y=state.Y, YdX=state.J)
+        R, H, active_indices = self._compute_R(state, grad=grad)
+        # sometimes the Hessian is not NSD
+        if self.preconditioning:
             Hessian = self._precondition_hessian(Hessian)
-        R, dH, active_indices = self._compute_R(self.state, grad=grad)
-        DR = Hessian
-        idx = active_indices[:, self.dim_p :]
+        DR, idx = Hessian, active_indices[:, self.dim_p :]
         if self._constrained:
-            dH = block_diag(*dH)  # (N * p, N * dim), `p` is the number of active constraints
-            ddH = block_diag(*[self.state.ddH[i, k, ...] * dual_vars[i, k] for i, k in enumerate(idx)])
-            # ddH = block_diag(
-            # *[np.einsum("ijk,i->jk", self._h_hessian(x), dual_vars[i]) for i, x in enumerate(primal_vars)]
-            # *[(self.g_hessian(x) * dual_vars[i, idx[i]])[0] for i, x in enumerate(primal_vars)]
-            # )  # (N * dim, mu * dim)
-            Z = np.zeros((len(dH), len(dH)))
-            DR = np.r_[np.c_[DR + ddH, dH.T], np.c_[dH, Z]]
+            H = block_diag(*H)  # (N * p, N * dim), `p` is the number of active constraints
+            B = state.cstr_hess
+            M = block_diag(*[np.einsum("i...,i", B[i, k], state.dual[i, k]) for i, k in enumerate(idx)])
+            Z = np.zeros((len(H), len(H)))
+            DR = np.r_[np.c_[DR + M, H.T], np.c_[H, Z]]
         # the vector-format of R
         R_ = np.r_[R[:, : self.dim_p].reshape(-1, 1), R[:, self.dim_p :][idx].reshape(-1, 1)]
         with warnings.catch_warnings():
@@ -239,92 +214,171 @@ def _compute_netwon_step(self) -> Tuple[np.ndarray, np.ndarray]:
         newton_step = Nd_vector_to_matrix(newton_step_.ravel(), self.N, self.dim, self.dim_p, active_indices)
         return newton_step, R
 
+    def _compute_indicator_value(self):
+        self.curr_indicator_value = self.indicator.compute(Y=self.state.Y)
+
     def newton_iteration(self):
-        self._check_XY()
-        self.step = np.zeros((self.N, self.dim))
-        self.step_size = np.ones(self.N)
-        self.G = np.zeros((self.N, self.dim))
+        # check for anomalies in `X` and `Y`
+        self._check_population()
+        # first compute the current indicator value
+        self._compute_indicator_value()
+        self._nondominated_idx = non_domin_sort(self.state.Y, only_front_indices=True)[0]
         # partition the approximation set to by feasibility
-        self._nondominated_idx = non_domin_sort(self.Y, only_front_indices=True)[0]
-        if self.h is None:
-            feasible_mask = np.array([True] * self.N)
-        else:
-            eq_cstr = np.array([self.h(_) for _ in self._get_primal_dual(self.X)[0]]).reshape(self.N, -1)
-            feasible_mask = np.all(np.isclose(eq_cstr, 0, atol=1e-4, rtol=0), axis=1)
-
+        feasible_mask = self.state.is_feasible()
         feasible_idx = np.nonzero(feasible_mask)[0]
-        dominated_idx = list((set(range(self.N)) - set(self._nondominated_idx) - set(feasible_idx)))
-        if np.any(feasible_mask):
+        infeasible_idx = np.nonzero(~feasible_mask)[0]
+        partitions = {0: np.array(range(self.N))}
+        if len(feasible_idx) > 0:
             # non-dominatd sorting of the feasible points
-            partitions = non_domin_sort(self.Y[feasible_mask], only_front_indices=True)
+            partitions = non_domin_sort(self.state.Y[feasible_idx], only_front_indices=True)
             partitions = {k: feasible_idx[v] for k, v in partitions.items()}
-            partitions.update({0: np.sort(np.r_[partitions[0], np.nonzero(~feasible_mask)[0]])})
-        else:
-            partitions = {0: np.array(range(self.N))}
+            # add all infeasible points to the first dominance layer
+            partitions.update({0: np.sort(np.r_[partitions[0], infeasible_idx])})
 
         # compute the Newton direction for each partition
-        for _, idx in partitions.items():
-            out = self._compute_netwon_step(X=self.X[idx], Y=self.Y[idx])
-            self.step[idx, :] = out["step"]
-            self.G[idx, :] = out["G"]
+        self.step = np.zeros((self.N, self.dim))
+        self.step_size = np.ones(self.N)
+        self.R = np.zeros((self.N, self.dim))
+        for i, idx in partitions.items():
+            # compute Newton step
+            newton_step, R = self._compute_netwon_step(self.state[idx])
+            self.step[idx, :] = newton_step
+            self.R[idx, :] = R
             # backtracking line search with Armijo's condition for each layer
-            if _ == 0 and len(dominated_idx) > 0:  # for the first layer
-                idx_ = list(set(idx) - set(dominated_idx))
-                for k in dominated_idx:  # for dominated and infeasible points
-                    self.step_size[k] = self._line_search_dominated(self.X[[k]], self.step[[k]])
-                self.step_size[idx_] = self._line_search(self.X[idx_], self.step[idx_], G=self.G[idx_])
-            else:  # for all other layers
-                self.step_size[idx] = self._line_search(self.X[idx], self.step[idx], G=self.G[idx])
-
-        self.X += self.step_size.reshape(-1, 1) * self.step
-        # evaluation
-        self.Y = np.array([self.func(x) for x in self._get_primal_dual(self.X)[0]])
+            idx_ = list(set(idx) - set(infeasible_idx)) if i == 0 else idx
+            self.step_size[idx_] = self._line_search(self.state[idx_], self.step[idx_], R=self.R[idx_])
+        # TODO: unify `_line_search` and `_line_search_dominated`
+        # for k in infeasible_idx:  # for dominated and infeasible points
+        # self.step_size[k] = self._line_search(self.state[infeasible_idx], self.step[infeasible_idx], R=self.R[infeasible_idx])
+        # TODO: this part should be tested
+        self.step_size[infeasible_idx] = self._line_search(
+            self.state[infeasible_idx], self.step[infeasible_idx], R=self.R[infeasible_idx]
+        )
+        # Newton iteration and evaluation
+        self.state.update(self.state.X + self.step_size.reshape(-1, 1) * self.step)
+
+    def log(self):
         self.iter_count += 1
+        self.history_X += [self.state.primal.copy()]
+        self.history_Y += [self.state.Y.copy()]
+        self.history_indicator_value += [self.curr_indicator_value]
+        self.history_R_norm += [np.median(np.linalg.norm(self.R, axis=1))]
+        for name, func in self.metrics.items():  # compute the performance metrics
+            self.history_metrics[name].append(func.compute(Y=self.state.Y))
+        if self.verbose:
+            self.logger.info(f"iteration {self.iter_count} ---")
+            self.logger.info(f"{self.indicator.__class__.__name__}: {self.curr_indicator_value}")
+            self.logger.info(f"step size: {self.step_size.ravel()}")
+            self.logger.info(f"R norm: {self.history_R_norm[-1]}")
+            self.logger.info(f"#non-dominated: {len(self._nondominated_idx)}")
+
+    def terminate(self) -> bool:
+        if self.iter_count >= self.max_iters:
+            self.stop_dict["iter_count"] = self.iter_count
+
+        return bool(self.stop_dict)
+
+    def _precondition_hessian(self, H: np.ndarray) -> np.ndarray:
+        """Precondition the Hessian matrix to make sure it is negative definite
+
+        Args:
+            H (np.ndarray): the Hessian matrix
+
+        Returns:
+            np.ndarray: the preconditioned Hessian
+        """
+        # TODO: remove this function
+        # pre-condition the Hessian
+        beta = 1e-6
+        v = np.min(np.diag(-H))
+        tau = 0 if v > 0 else -v + beta
+        I = np.eye(H.shape[0])
+        for _ in range(35):
+            try:
+                cholesky(-H + tau * I, lower=True)
+                break
+            except:
+                # NOTE: the multiplier is not working for Eq1IDTLZ3.. Otherwise, it takes 1.5
+                tau = max(1.5 * tau, beta)
+        else:
+            self.logger.warn("Pre-conditioning the HV Hessian failed")
+        return H - tau * I
 
-    def _line_search(self, X: np.ndarray, step: np.ndarray, G: np.ndarray) -> float:
+    def _line_search(
+        self, state: State, step: np.ndarray, R: np.ndarray, max_step_size: float = None
+    ) -> float:
         """backtracking line search with Armijo's condition"""
-        # TODO: ad-hoc! to solve this in the further using a high precision numerical library
-        # NOTE: when the step length is close to numpy's numerical resolution, it makes no sense to perform
-        # the step-size control
-        if np.any(np.isclose(np.median(step), np.finfo(np.double).resolution)):
+        c1 = 1e-4
+        if np.all(np.isclose(step, 0)):
             return 1
 
-        c = 1e-5
-        N = len(X)
-        primal_vars = self._get_primal_dual(X)[0]
-        normal_vectors = np.c_[np.eye(self.dim_p * N), -1 * np.eye(self.dim_p * N)]
-        # calculate the maximal step-size
-        dist = np.r_[
-            np.abs(primal_vars.ravel() - np.tile(self.xl, N)),
-            np.abs(np.tile(self.xu, N) - primal_vars.ravel()),
-        ]
-        v = step[:, : self.dim_p].ravel() @ normal_vectors
-        alpha = min(1, 0.25 * np.min(dist[v < 0] / np.abs(v[v < 0])))
+        def phi_func(alpha):
+            state_ = deepcopy(state)
+            state_.update(state.X + alpha * step)
+            R = self._compute_R(state_)[0]
+            return np.linalg.norm(R)
 
+        step_size = 1 if max_step_size is None else max_step_size
+        phi = [np.linalg.norm(R)]
+        s = [0, step_size]
         for _ in range(6):
-            X_ = X + alpha * step
-            if self.h is None:
-                HV = self.indicator.HV(X)
-                HV_ = self.indicator.HV(X_)
-                inc = np.inner(G.ravel(), step.ravel())
-                cond = HV_ - HV >= c * alpha * inc
-            else:
-                G_ = self._compute_R(X_)
-                cond = np.linalg.norm(G_) <= (1 - c * alpha) * np.linalg.norm(G)
-            if cond:
+            phi.append(phi_func(s[-1]))
+            # Armijo–Goldstein condition
+            # when R norm is close to machine precision, it makes no sense to perform the line search
+            success = phi[-1] <= (1 - c1 * s[-1]) * phi[0] or np.isclose(phi[0], np.finfo(float).eps)
+            if success:
                 break
             else:
                 if 11 < 2:
-                    phi0 = HV if self.h is None else np.sum(G**2) / 2
-                    phi1 = HV_ if self.h is None else np.sum(G_**2) / 2
-                    phi0prime = inc if self.h is None else -np.sum(G**2)
-                    alpha = -phi0prime * alpha**2 / (phi1 - phi0 - phi0prime * alpha) / 2
-                    # alpha *= tau
-                if 1 < 2:
-                    alpha *= 0.5
+                    # cubic interpolation to compute the next step length
+                    d1 = -phi[-2] - phi[-1] - 3 * (phi[-2] - phi[-1]) / (s[-2] - s[-1])
+                    d2 = np.sign(s[-1] - s[-2]) * np.sqrt(d1**2 - phi[-2] * phi[-1])
+                    s_ = s[-1] - (s[-1] - s[-2]) * (-phi[-1] + d2 - d1) / (-phi[-1] + phi[-2] + 2 * d2)
+                    s_ = s[-1] * 0.5 if np.isnan(s_) else np.clip(s_, 0.4 * s[-1], 0.6 * s[-1])
+                    s.append(s_)
+                else:
+                    s.append(s[-1] * 0.5)
         else:
-            self.logger.warn("Armijo's backtracking line search failed")
-        return alpha
+            self.logger.warn("backtracking line search failed")
+        step_size = s[-1]
+        return step_size
+
+        # c = 1e-5
+        # N = len(X)
+        # primal_vars = self._get_primal_dual(X)[0]
+        # normal_vectors = np.c_[np.eye(self.dim_p * N), -1 * np.eye(self.dim_p * N)]
+        # # calculate the maximal step-size
+        # dist = np.r_[
+        #     np.abs(primal_vars.ravel() - np.tile(self.xl, N)),
+        #     np.abs(np.tile(self.xu, N) - primal_vars.ravel()),
+        # ]
+        # v = step[:, : self.dim_p].ravel() @ normal_vectors
+        # alpha = min(1, 0.25 * np.min(dist[v < 0] / np.abs(v[v < 0])))
+
+        # for _ in range(6):
+        #     X_ = X + alpha * step
+        #     if self.h is None:
+        #         HV = self.indicator.compute(X)
+        #         HV_ = self.indicator.compute(X_)
+        #         inc = np.inner(R.ravel(), step.ravel())
+        #         cond = HV_ - HV >= c * alpha * inc
+        #     else:
+        #         G_ = self._compute_R(X_)
+        #         cond = np.linalg.norm(G_) <= (1 - c * alpha) * np.linalg.norm(R)
+        #     if cond:
+        #         break
+        #     else:
+        #         if 11 < 2:
+        #             phi0 = HV if self.h is None else np.sum(R**2) / 2
+        #             phi1 = HV_ if self.h is None else np.sum(G_**2) / 2
+        #             phi0prime = inc if self.h is None else -np.sum(R**2)
+        #             alpha = -phi0prime * alpha**2 / (phi1 - phi0 - phi0prime * alpha) / 2
+        #             # alpha *= tau
+        #         if 1 < 2:
+        #             alpha *= 0.5
+        # else:
+        #     self.logger.warn("Armijo's backtracking line search failed")
+        # return alpha
 
     def _line_search_dominated(self, X: np.ndarray, step: np.ndarray) -> float:
         """backtracking line search with Armijo's condition"""
@@ -363,9 +417,9 @@ def _line_search_dominated(self, X: np.ndarray, step: np.ndarray) -> float:
             self.logger.warn("Armijo's backtracking line search failed")
         return alpha
 
-    def _check_XY(self):
+    def _check_population(self):
         # get unique points: if some points converge to the same location
-        primal_vars = self.X[:, : self.dim_p]
+        primal_vars = self.state.primal
         D = cdist(primal_vars, primal_vars)
         drop_idx_X = set([])
         for i in range(self.N):
@@ -373,54 +427,17 @@ def _check_XY(self):
                 drop_idx_X |= set(np.nonzero(np.isclose(D[i, :], 0, rtol=self.eps))[0]) - set([i])
 
         # get rid of weakly-dominated points
-        # TODO: Ad-hoc solution! check if this is still needed
-        # since the hypervolume indicator module is upgraded
         drop_idx_Y = set([])
         # TODO: Ad-hoc solution! check if this is still needed
-        if self.problem_name is not None and self.problem_name not in ("Eq1DTLZ4", "Eq1IDTLZ4"):
-            for i in range(self.N):
-                if i not in drop_idx_Y:
-                    drop_idx_Y |= set(np.nonzero(np.any(np.isclose(self.Y[i, :], self.Y), axis=1))[0]) - set(
-                        [i]
-                    )
+        # if self.problem_name is not None and self.problem_name not in ("Eq1DTLZ4", "Eq1IDTLZ4"):
+        #     for i in range(self.N):
+        #         if i not in drop_idx_Y:
+        #             drop_idx_Y |= set(np.nonzero(np.any(np.isclose(self.Y[i, :], self.Y), axis=1))[0]) - set(
+        #                 [i]
+        #             )
         idx = list(set(range(self.N)) - (drop_idx_X | drop_idx_Y))
-        self.N = len(idx)
-        self.X = self.X[idx, :]
-        self.Y = self.Y[idx, :]
-
-    def log(self):
-        HV = hypervolume(self.Y, self.ref)
-        self.history_Y += [self.Y.copy()]
-        self.history_X += [self._get_primal_dual(self.X.copy())[0]]
-        self.history_HV += [HV]
-
-        if self.verbose:
-            self.logger.info(f"iteration {self.iter_count} ---")
-            self.logger.info(f"HV: {HV}")
-            # self.logger.info(f"step size: {self.step_size}")
-            self.logger.info(f"#non-dominated: {len(self._nondominated_idx)}")
-
-        if self.iter_count >= 1:
-            try:
-                self._delta_X = np.mean(
-                    np.sqrt(np.sum((self.history_X[-1] - self.history_X[-2]) ** 2, axis=1))
-                )
-                self._delta_Y = np.mean(
-                    np.sqrt(np.sum((self.history_Y[-1] - self.history_Y[-2]) ** 2, axis=1))
-                )
-                self._delta_HV = np.abs(self.history_HV[-1] - self.history_HV[-2])
-            except:
-                pass
-
-        if self.h is not None:
-            self.hist_R_norm += [np.median(np.linalg.norm(self.G[self._nondominated_idx], axis=1))]
-            self.logger.info(f"G norm: {self.hist_R_norm[-1]}")
-
-    def terminate(self) -> bool:
-        if self.iter_count >= self.max_iters:
-            self.stop_dict["iter_count"] = self.iter_count
-
-        return bool(self.stop_dict)
+        self.state = self.state[idx]
+        self.N = self.state.N
 
 
 class DpN:
@@ -493,7 +510,7 @@ def __init__(
         self.xl = xl
         self.xu = xu
         self._check_constraints(h, g)
-        self.state = State(self.dim_p, self.n_eq, self.n_ieq, func, jac, h, h_jac, g, g_jac)
+        self.state = State(self.dim_p, self.n_eq, self.n_ieq, func, jac, h=h, h_jac=h_jac, g=g, g_jac=g_jac)
         self._initialize(x0)
         self._set_indicator(ref, func, jac, hessian)
         self._set_logging(verbose)
@@ -676,7 +693,7 @@ def _compute_R(
         dH, idx = None, None
         if self._constrained:
             func = lambda g, dual, h: g + np.einsum("j,jk->k", dual, h)
-            v, idx, dH = state.cstr_value, state.active_indices, state.dH
+            v, idx, dH = state.cstr_value, state.active_indices, state.cstr_grad
             dH = [dH[i][idx, :] for i, idx in enumerate(idx)]
             R = [np.r_[func(grad[i], dual_vars[i, k], dH[i]), v[i, k]] for i, k in enumerate(idx)]
         return R, dH, idx
@@ -782,7 +799,7 @@ def _backtracking_line_search(
 
         def phi_func(alpha, i):
             state = deepcopy(self.state)
-            x = state.X[i]
+            x = state.X[i].copy()
             x += alpha * step[i]
             state.update_one(x, i)
             self.active_indicator.re_match = False
diff --git a/tests/test_2D.py b/tests/test_2D.py
index e776142..e713ac8 100644
--- a/tests/test_2D.py
+++ b/tests/test_2D.py
@@ -11,7 +11,7 @@
 def test_2D_case1():
     ref = np.array([11, 10])
     hvh = HypervolumeDerivatives(2, 2, ref, minimization=True)
-    out = hvh.compute(X=np.array([(10, 1), (9.5, 3), (8, 6.5), (4, 8), (1, 9)]))
+    out = hvh._compute_hessian(X=np.array([(10, 1), (9.5, 3), (8, 6.5), (4, 8), (1, 9)]))
     assert np.all(out["HVdY"] == np.array([-2, -1, -3.5, -0.5, -1.5, -1.5, -1, -4, -1, -3]))
     assert np.all(
         out["HVdY2"]
@@ -50,8 +50,8 @@ def MOP1_Hessian(x):
 def test_2D_case2():
     ref = np.array([20, 20])
     hvh = HypervolumeDerivatives(
-        n_decision_var=2,
-        n_objective=2,
+        n_var=2,
+        n_obj=2,
         ref=ref,
         func=MOP1,
         jac=MOP1_Jacobian,
@@ -60,7 +60,7 @@ def test_2D_case2():
     )
     for _ in range(10):
         X = np.random.rand(3, 2)
-        out = hvh.compute(X)
+        out = hvh._compute_hessian(X)
         AD = hvh.compute_automatic_differentiation(X)
         assert np.all(np.isclose(out["HVdX"], AD["HVdX"]))
         assert np.all(np.isclose(out["HVdX2"], AD["HVdX2"]))
diff --git a/tests/test_3D.py b/tests/test_3D.py
index 7d01c7d..11ad6df 100644
--- a/tests/test_3D.py
+++ b/tests/test_3D.py
@@ -9,7 +9,7 @@
 def test_3D_case1():
     ref = np.array([9, 10, 12])
     hvh = HypervolumeDerivatives(3, 3, ref, minimization=True)
-    out = hvh.compute(X=np.array([[5, 1, 7], [2, 3, 10]]))
+    out = hvh._compute_hessian(X=np.array([[5, 1, 7], [2, 3, 10]]))
     assert np.all(
         out["HVdY2"]
         == np.array(
@@ -28,7 +28,7 @@ def test_3D_case1():
 def test_3D_case2():
     ref = np.array([9, 10, 12])
     hvh = HypervolumeDerivatives(3, 3, ref, minimization=True)
-    out = hvh.compute(X=np.array([[-1, 5, 7], [2, 3, 10]]))
+    out = hvh._compute_hessian(X=np.array([[-1, 5, 7], [2, 3, 10]]))
     assert np.all(
         out["HVdY2"]
         == np.array(
@@ -47,7 +47,7 @@ def test_3D_case2():
 def test_3D_case3():
     ref = np.array([9, 10, 12])
     hvh = HypervolumeDerivatives(3, 3, ref, minimization=True)
-    out = hvh.compute(X=np.array([[5, 3, 7], [2, 1, 10]]))
+    out = hvh._compute_hessian(X=np.array([[5, 3, 7], [2, 1, 10]]))
     assert np.all(
         out["HVdY2"]
         == np.array(
@@ -66,7 +66,7 @@ def test_3D_case3():
 def test_3D_case4():
     ref = np.array([10, 13, 23])
     hvh = HypervolumeDerivatives(3, 3, ref, minimization=True)
-    out = hvh.compute(X=np.array([(8, 7, 10), (4, 11, 17), (2, 9, 21)]))
+    out = hvh._compute_hessian(X=np.array([(8, 7, 10), (4, 11, 17), (2, 9, 21)]))
     assert np.all(out["HVdY"] == np.array([-62, -26, -12, -8, -16, -8, -8, -12, -16]))
     assert np.all(
         out["HVdY2"]
@@ -89,7 +89,7 @@ def test_3D_case4():
 def test_3D_case5():
     ref = np.array([17, 35, 7])
     hvh = HypervolumeDerivatives(3, 3, ref, minimization=True)
-    out = hvh.compute(
+    out = hvh._compute_hessian(
         X=np.array([(16, 23, 1), (14, 32, 2), (12, 27, 3), (10, 21, 4), (8, 33, 5), (6.5, 31, 6)])
     )
     assert np.all(
@@ -126,7 +126,7 @@ def test_3D_case5():
 def test_3D_case6():
     ref = np.array([3, 3, 3])
     hvh = HypervolumeDerivatives(3, 3, ref, minimization=True)
-    out = hvh.compute(X=np.array([(1, 2, -1)]))
+    out = hvh._compute_hessian(X=np.array([(1, 2, -1)]))
     assert np.all(
         out["HVdY2"]
         == np.array(
@@ -142,7 +142,7 @@ def test_3D_case6():
 def test_with_dominated_points():
     ref = np.array([9, 10, 12])
     hvh = HypervolumeDerivatives(3, 3, ref, minimization=True)
-    out = hvh.compute(X=np.array([[-1, -2, 7], [2, 1, 10]]))
+    out = hvh._compute_hessian(X=np.array([[-1, -2, 7], [2, 1, 10]]))
     assert np.all(
         out["HVdY2"]
         == np.array(
@@ -191,9 +191,7 @@ def MOP1_Hessian(x):
     return np.array([2 * np.eye(3), 2 * np.eye(3), 2 * np.eye(3)])
 
 
-hvh = HypervolumeDerivatives(
-    n_decision_var=3, n_objective=3, ref=ref, func=MOP1, jac=MOP1_Jacobian, hessian=MOP1_Hessian
-)
+hvh = HypervolumeDerivatives(n_var=3, n_obj=3, ref=ref, func=MOP1, jac=MOP1_Jacobian, hessian=MOP1_Hessian)
 
 
 def test_against_autograd():
@@ -201,7 +199,7 @@ def test_against_autograd():
         w = np.random.rand(20, 3)
         w /= np.sum(w, axis=1).reshape(-1, 1)
         X = w @ np.vstack([c1, c2, c3])
-        out = hvh.compute(X)
+        out = hvh._compute_hessian(X)
         AD = hvh.compute_automatic_differentiation(X)
 
         assert np.all(np.isclose(AD["HVdY"], out["HVdY"]))
diff --git a/tests/test_ND.py b/tests/test_ND.py
index 47e7808..da39281 100644
--- a/tests/test_ND.py
+++ b/tests/test_ND.py
@@ -44,14 +44,14 @@ def test_4D(n_objective):
         ref = Y.max(axis=0) * 1.2
 
         hvh = HypervolumeDerivatives(
-            n_decision_var=dim,
-            n_objective=n_objective,
+            n_var=dim,
+            n_obj=n_objective,
             ref=ref,
             func=func,
             jac=jac,
             hessian=hessian,
         )
-        out = hvh.compute(X)
+        out = hvh._compute_hessian(X)
         AD = hvh.compute_automatic_differentiation(X)
 
         assert np.all(np.isclose(AD["HVdY"], out["HVdY"], atol=1e-5, rtol=1e-8))