Mixed-variable optimization#

This page demonstrates how to minimize mixed-variable problems. SMT Optim supports float, integer and categorical variables.

import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import proj3d

# mixed-variables optimization requires the use of SMT's DesignSpace class
import smt.design_space as ds

from smt_optim import minimize

# method to fetch optimization problems
from smt_optim.benchmarks.registry import get_problem

# utility method to help with plotting 2d functions
from smt_optim.utils.plot_2d import get_plot2d_data

Unconstrained mixed-variable optimization#

The following example illustrates how to minimize the mixed-variable Branin test function. This example assumes the user is familiar with unconstrained optimization of a continuous design space. This problem has 2 continuous ‘float’ variables (\(x_1,\; x_2\)) and 2 categorical variables (\(z_1,\; z_2\)). It can be written mathematically as:

\[ h(x_1, x_2) = \frac{ \left[ 15 x_2 - \frac{5}{4 \pi^2} (15 x_1 - 5)^2 + \frac{5}{\pi}(15 x_1 - 5) - 6 \right]^2 + 10 \left(1 - \frac{1}{8\pi}\right)\cos(15 x_1 - 5) + 10 - 54.8104 }{51.9496} \]
\[\begin{split} f(x_1, x_2, z_1, z_2) = \begin{cases} h(x_1, x_2), & \text{if } (z_1, z_2) = (0,0) \\[6pt] 0.4\, h(x_1, x_2), & \text{if } (z_1, z_2) = (0,1) \\[6pt] -0.75\, h(x_1, x_2) + 3.0, & \text{if } (z_1, z_2) = (1,0) \\[6pt] -0.5\, h(x_1, x_2) + 1.4, & \text{if } (z_1, z_2) = (1,1) \end{cases} \end{split}\]

The goal is to minimize the objective function over the mixed-variable design space, which can be formulated as so:

\[\begin{split} \begin{aligned} \min_{x_1, x_2, z_1, z_2} \quad & f(x_1, x_2, z_1, z_2) \\ \text{s.t.} \quad & x_1 \in [0,1], \quad x_2 \in [0,1], \quad z_1 \in \{0,1\}, \quad z_2 \in \{0,1\}. \end{aligned} \end{split}\]

The cell below imports the test function from within SMT Optim test problems collection. The mixed-variable design space is initialized with an SMT’s DesignSpace class.

# fetches the mixed-variable test problem
problem = get_problem("MixVarBranin")

# defines the design space using SMT's DesignSpace class
design_space = ds.DesignSpace([
    ds.FloatVariable(lower=0, upper=1),
    ds.FloatVariable(lower=0, upper=1),
    ds.CategoricalVariable([0, 1]),
    ds.CategoricalVariable([0, 1]),
])

# ------- plots the test problem -------
bounds = np.array([
    [0, 1],
    [0, 1],
])

z_combinations = [
    (0, 0),
    (0, 1),
    (1, 0),
    (1, 1),
]

fig = plt.figure(figsize=(9, 7))

for i, (z1, z2) in enumerate(z_combinations, start=1):

    def wrapper(x):
        x0 = np.array([x[0], x[1], z1, z2])
        return problem.objective(x0)

    XX, YY, Z = get_plot2d_data(wrapper, bounds, 51)

    ax = fig.add_subplot(2, 2, i, projection="3d")
    surf = ax.plot_surface(XX, YY, Z, cmap="viridis", edgecolor="none")
    ax.set_title(r"$(z_1, z_2)$ = (" + str(z1) + ", " + str(z2) + ")")
    ax.set_xlabel(r"$x_1$")
    ax.set_ylabel(r"$x_2$")

    fig.colorbar(surf, ax=ax, shrink=0.7, pad=0.1)

plt.tight_layout(h_pad=2.5)
plt.show()
../_images/da1689abb0fd4297cf7db24a0b1d462799f051d41e9066ff570de4e3b0b907d5.png

Starting the optimization#

The Bayesian optimization is started with the minimize method. To define a mixed-variable design space instead of a continuous one, an SMT’s DesignSpace object must be given to the design space argument.

# starts the optimization
state = minimize(
    [problem.objective],            # list of callables (single callable for mono-fidelity)
    design_space,                   # SMT's Design Space object
    max_iter=10,
    driver_kwargs={"seed": 0}
)

# fetches the best sample
best_sample = state.get_best_sample()
print(best_sample)
          iter         budget           fmin           rscv       fidelity        gp_time       acq_time
             0              5   -6.95818e-01      0.000e+00            nan            nan            nan
             1              6   -6.95818e-01      0.000e+00              1          0.834          1.641
             2              7   -6.95818e-01      0.000e+00              1          0.646          1.568
             3              8   -8.88283e-01      0.000e+00              1          0.431          1.441
             4              9   -8.88283e-01      0.000e+00              1          0.401          1.258
             5             10   -8.88283e-01      0.000e+00              1          0.401          1.806
             6             11   -8.88283e-01      0.000e+00              1          0.608          1.322
             7             12   -9.38515e-01      0.000e+00              1          0.421          1.270
             8             13   -9.38515e-01      0.000e+00              1          0.483          1.332
             9             14   -9.38515e-01      0.000e+00              1          0.448          1.195
          iter         budget           fmin           rscv       fidelity        gp_time       acq_time
            10             15   -9.38515e-01      0.000e+00              1          0.438          1.287
======= sample data =======
x =             [0.59433983 0.         0.         0.        ]
obj =           [-0.93851473]
cstr =          []
eval_time =     [3.15470024e-05]
------- meta data -------
iter =     7
budget =     12
rscv =     0.0
===========================

Plotting the results#

The code snippet below plots the best sample, displayed with a star, against the test problem.

x_star = best_sample.x
y_star = best_sample.obj[0]
#  categorical variables
z1_star = x_star[2]
z2_star = x_star[3]

# ------- plots the best sample against the functions -------
fig = plt.figure(figsize=(9, 7))

for i, (z1, z2) in enumerate(z_combinations, start=1):

    def wrapper(x):
        x0 = np.array([x[0], x[1], z1, z2])
        return problem.objective(x0)

    XX, YY, Z = get_plot2d_data(wrapper, bounds, 51)

    ax = fig.add_subplot(2, 2, i, projection="3d")
    surf = ax.plot_surface(XX, YY, Z, cmap="viridis", edgecolor="none", zsort="min")

    ax.set_title(r"$(z_1, z_2)$ = (" + str(z1) + ", " + str(z2) + ")")
    ax.set_xlabel(r"$x_1$")
    ax.set_ylabel(r"$x_2$")
    fig.colorbar(surf, ax=ax, shrink=0.7, pad=0.1)

    if z1 == z1_star and z2 == z2_star:

        x2, y2, _ = proj3d.proj_transform(
            x_star[0], x_star[1], y_star,
            ax.get_proj()
        )

        ax.annotate(
            "★",
            (x2, y2),
            color="C3",
            fontsize=14,
            ha="center"
        )

plt.tight_layout(h_pad=2.5)
plt.show()
../_images/3b6c978429160fdb3cba94b14805104627b8265116a0755e64558eced01ec953.png

Constrained, multi-fidelity, and mixed-variable optimization#

The following example illustrates how to minimize the constrained, multi-fidelity, mixed-variable Branin test function. This example assumes the user is familiar with constrained and multi-fidelity optimization. The problem’s design space has 2 continuous ‘float’ variables and 2 categorical variables. The mixed-variable constraint function is defined as:

\[\begin{split} g(x_1, x_2, z_1, z_2) = \begin{cases} - x_1 x_2 + 0.4 & \text{if } (z_1, z_2) = (0,0), \\ - 1.5\,x_1 x_2 + 0.4 & \text{if } (z_1, z_2) = (0,1), \\ - 1.5\,x_1 x_2 + 0.2 & \text{if } (z_1, z_2) = (1,0), \\ - 1.2\,x_1 x_2 + 0.3 & \text{if } (z_1, z_2) = (1,1), \end{cases} \end{split}\]

The multi-fidelity objective and constraint functions take the following form:

\[\begin{split} \begin{aligned} f_{\mathrm{HF}}(x_1, x_2, z_1, z_2) &= f(x_1, x_2, z_1, z_2), \\ f_{\mathrm{LF}}(x_1, x_2, z_1, z_2) &= f_{\mathrm{HF}}(x_1, x_2, z_1, z_2) - \cos(0.5x_1) - x_2^3, \\ g_{\mathrm{HF}}(x_1, x_2, z_1, z_2) &= g(x_1, x_2, z_1, z_2), \\ g_{\mathrm{LF}}(x_1, x_2, z_1, z_2) &= g_{\mathrm{HF}}(x_1, x_2) - 0.1 \sin(10x_1 + 5x_2), \end{aligned} \end{split}\]

where \(f(\boldsymbol x)\) is the same objective function as the previous example. The goal is to minimize the high-fidelity objective function with respect to the inequality constraint and leverage their ‘cheaper’ approximation to reduce the optimization cost.

\[\begin{split} \begin{aligned} \min_{\boldsymbol x \in \mathbb R^2, \; \boldsymbol z \in \mathbb Z_+^2} \quad & f_\text{HF}(x_1, x_2, z_1, z_2) \\ \text{s.t.} \quad & 0 \leq x_i \leq 1 \quad i = 1, 2 \\ & z_i \in \{0, 1\} \quad i = 1, 2 \\ & g_\text{HF} (x_1, x_2, z_1, z_2) \leq 0. \end{aligned} \end{split}\]

The cell below imports the test function from within SMT Optim test problems collection. The design space is initialized with an SMT’s DesignSpace class. The figure shows the low- (in purple) and high-fidelity (in blue) test problems over their entire design space. The semi-transparent regions denote the unfeasible regions due to the inequality constraint.

# fetches the mixed-variable test problem
problem = get_problem("MultiFidelityMixVarBranin")

# defines the design space using SMT's DesignSpace class
design_space = ds.DesignSpace([
    ds.FloatVariable(lower=0, upper=1),
    ds.FloatVariable(lower=0, upper=1),
    ds.CategoricalVariable([0, 1]),
    ds.CategoricalVariable([0, 1]),
])

# ------- plots the test problem -------
bounds = np.array([
    [0, 1],
    [0, 1],
])

z_combinations = [
    (0, 0),
    (0, 1),
    (1, 0),
    (1, 1),
]

colors = ["C4", "C0"]

fig = plt.figure(figsize=(9, 7))

for i, (z1, z2) in enumerate(z_combinations, start=1):

    ax = fig.add_subplot(2, 2, i, projection="3d")

    for lvl in range(problem.num_fidelity):

        def wrapper(x, func):
            x0 = np.array([x[0], x[1], z1, z2])
            return func(x0)

        XX, YY, Z = get_plot2d_data(lambda x, f=problem.objective[lvl]: wrapper(x, func=f), bounds, 101)
        _, _, G = get_plot2d_data(lambda x, f=problem.constraints[0][lvl]: wrapper(x, func=f), bounds, 101)

        Z_feas = np.where(G > 0, np.nan, Z)

        ax.set_title(r"$(z_1, z_2)$ = (" + str(z1) + ", " + str(z2) + ")", pad=5.0)
        ax.set_xlabel(r"$x_1$")
        ax.set_ylabel(r"$x_2$")
        ax.plot_wireframe(XX, YY, Z_feas, linewidth=0.5, edgecolor=colors[lvl])
        ax.plot_wireframe(XX, YY, Z, alpha=0.2, linewidth=0.5, edgecolor=colors[lvl])

plt.tight_layout(h_pad=2.5)
plt.show()
../_images/37ccc84ca7eeb4da3a57a32f7e0317ccd5e55389d1523488a27fb248c7989bb7.png

Starting the optimization#

The Bayesian optimization is started by calling the minimize method. To define a mixed-variable design space instead of a continuous one, an SMT’s DesignSpace object must be given to the design space argument. Additionally, the objective argument expects a list of method callables; the same applies to any constraints.

# defines the constraint
constraint = [
    {
        "fun": problem.constraints[0],      # list of callables
        "upper": 0.,                        # g(x) <= 0
    }
]

# starts the optimization
state = minimize(
    problem.objective,                     # list of callables
    design_space,                           # ds.DesignSpace object
    constraints=constraint,
    costs=[0.1, 1.],
    max_iter=20,
    max_budget=20,
    # arguments passed to the optimization driver
    driver_kwargs={
        "nt_init": 3,
        "seed": 42
    },
    # arguments passed to the acquisition strategy
    strategy_kwargs={
        # "fidelity_crit": "pessimistic",
        "sp_method": "Cobyla",
        "sp_tol": 1e-4
    }
)

# fetches the best sample
best_sample = state.get_best_sample()
print(best_sample)
          iter         budget           fmin           rscv       fidelity        gp_time       acq_time
             0          3.600    2.70574e+00      0.000e+00            nan            nan            nan
             1          3.700    2.70574e+00      0.000e+00              1          2.022          2.447
             2          4.800    2.70574e+00      0.000e+00              2          1.778          2.416
             3          5.900    2.70574e+00      0.000e+00              2          1.951          2.991
             4          7.000    2.70574e+00      0.000e+00              2          1.769          2.566
             5          7.100    2.70574e+00      0.000e+00              1          1.686          2.053
             6          7.200    2.70574e+00      0.000e+00              1          2.299          2.423
             7          8.300    3.25733e-01      0.000e+00              2          1.465          1.709
             8          9.400    3.25733e-01      0.000e+00              2          1.485          1.979
             9         10.500    3.25733e-01      0.000e+00              2          1.516          3.400
          iter         budget           fmin           rscv       fidelity        gp_time       acq_time
            10         11.600    3.25733e-01      0.000e+00              2          1.478          2.428
            11         12.700    3.25733e-01      0.000e+00              2          1.620          6.718
            12         13.800    3.25733e-01      0.000e+00              2          1.590          2.142
            13         14.900   -7.47823e-01      0.000e+00              2          1.717          3.542
            14         16.000   -7.47823e-01      0.000e+00              2          1.511          3.099
            15         17.100   -7.47823e-01      0.000e+00              2          1.711          5.699
            16         18.200   -7.47823e-01      0.000e+00              2          1.537          4.276
            17         19.300   -7.47823e-01      0.000e+00              2          1.493          4.141
            18         20.400   -7.47823e-01      0.000e+00              2          1.577          4.687
======= sample data =======
x =             [1.         0.43291579 0.         0.        ]
obj =           [-0.74782291]
cstr =          [-0.03291579]
eval_time =     [3.38099926e-06 9.48999514e-07]
------- meta data -------
iter =     13
budget =     14.899999999999997
rscv =     0.0
===========================

Plotting the results#

The code snippet below plots the best sample, displayed with a star, against the test problem.

best_sample = state.get_best_sample()
x_star = best_sample.x
y_star = best_sample.obj[0]
#  categorical variables
z1_star = x_star[2]
z2_star = x_star[3]

# ------- plots the best sample against the functions -------
fig = plt.figure(figsize=(9, 7))

for i, (z1, z2) in enumerate(z_combinations, start=1):

    ax = fig.add_subplot(2, 2, i, projection="3d")

    for lvl in range(problem.num_fidelity):

        def wrapper(x, func):
            x0 = np.array([x[0], x[1], z1, z2])
            return func(x0)

        XX, YY, Z = get_plot2d_data(lambda x, f=problem.objective[lvl]: wrapper(x, func=f), bounds, 101)
        _, _, G = get_plot2d_data(lambda x, f=problem.constraints[0][lvl]: wrapper(x, func=f), bounds, 101)

        Z_feas = np.where(G > 0, np.nan, Z)

        ax.set_title(r"$(z_1, z_2)$ = (" + str(z1) + ", " + str(z2) + ")")
        ax.set_xlabel(r"$x_1$")
        ax.set_ylabel(r"$x_2$")
        ax.plot_wireframe(XX, YY, Z_feas, linewidth=0.5, edgecolor=colors[lvl])
        ax.plot_wireframe(XX, YY, Z, alpha=0.2, linewidth=0.5, edgecolor=colors[lvl])

    if z1 == z1_star and z2 == z2_star:

        x2, y2, _ = proj3d.proj_transform(
            x_star[0], x_star[1], y_star,
            ax.get_proj()
        )

        ax.annotate(
            "★",
            (x2, y2),
            color="C3",
            fontsize=14,
            ha="center",
            zorder=20,
        )

plt.tight_layout(h_pad=2.5)
plt.show()
../_images/4b343a19f9df82eaa7c4b8bfdee9b6fcd9e1d56f5800f3a8bc4c0afb8890a41d.png