Source code for netket.experimental.driver.vmc_srt

# Copyright 2023  The NetKet Authors - All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#    http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from typing import Callable, Optional

from functools import partial
from textwrap import dedent
import warnings

import jax
import jax.numpy as jnp
import jax.scipy as jsp

from netket import jax as nkjax
from netket import stats as nkstats
from netket.driver import AbstractVariationalDriver
from netket.errors import UnoptimalSRtWarning
from netket.jax import sharding
from netket.operator import AbstractOperator
from netket.utils import mpi
from netket.utils.types import ScalarOrSchedule, Optimizer, PyTree
from netket.vqs import MCState

from jax.flatten_util import ravel_pytree


@partial(jax.jit, static_argnames=("mode", "solver_fn"))
def SRt(
    O_L, local_energies, diag_shift, *, mode, solver_fn, e_mean=None, params_structure
):
    """
    For more details, see `https://arxiv.org/abs/2310.05715'. In particular,
    the following parallel implementation is described in Appendix "Distributed SR computation".
    """
    N_params = O_L.shape[-1]
    N_mc = O_L.shape[0] * mpi.n_nodes

    local_energies = local_energies.flatten()

    if e_mean is None:
        e_mean = mpi.mean(local_energies)
    de = jnp.conj(local_energies - e_mean).squeeze()

    # * in this case O_L should be padded with zeros
    assert (N_params % mpi.n_nodes) == 0

    O_L = O_L / N_mc**0.5
    dv = -2.0 * de / N_mc**0.5

    if mode == "complex":
        # Concatenate the real and imaginary derivatives of the ansatz
        # O_L = jnp.concatenate((O_L[:, 0], O_L[:, 1]), axis=0)
        O_L = jnp.transpose(O_L, (1, 0, 2)).reshape(-1, O_L.shape[-1])

        dv = jnp.concatenate((jnp.real(dv), -jnp.imag(dv)), axis=-1)
    elif mode == "real":
        dv = dv.real
    else:
        raise NotImplementedError()

    # twons, (np, n_nodes) -> twons, np, n_nodes
    O_LT = O_L.reshape(O_L.shape[0], -1, mpi.n_nodes)
    # twons, np, n_nodes -> n_nodes, twons, np
    O_LT = jnp.moveaxis(O_LT, -1, 0)

    dv, token = mpi.mpi_gather_jax(dv)
    dv = dv.reshape(-1, *dv.shape[2:])
    O_LT, token = mpi.mpi_alltoall_jax(O_LT, token=token)

    # proc, twons, np -> (proc, twons) np
    O_LT = O_LT.reshape(-1, O_LT.shape[-1])

    matrix, token = mpi.mpi_reduce_sum_jax(O_LT @ O_LT.T, token=token)
    matrix_side = matrix.shape[-1]  # * it can be Ns or 2*Ns, depending on mode

    if mpi.rank == 0:
        matrix = matrix + diag_shift * jnp.eye(
            matrix_side
        )  # * shift diagonal regularization
        aus_vector = solver_fn(matrix, dv)
        aus_vector = aus_vector.reshape(mpi.n_nodes, -1)
        aus_vector, token = mpi.mpi_scatter_jax(aus_vector, token=token)
    else:
        shape = jnp.zeros((int(matrix_side / mpi.n_nodes),), dtype=jnp.float64)
        aus_vector, token = mpi.mpi_scatter_jax(shape, token=token)

    updates = O_L.T @ aus_vector
    updates, token = mpi.mpi_allreduce_sum_jax(updates, token=token)

    # If complex mode and we have complex parameters, we need
    # To repack the real coefficients in order to get complex updates
    if mode == "complex" and nkjax.tree_leaf_iscomplex(params_structure):
        np = updates.shape[-1] // 2
        updates = (updates[:np] + 1j * updates[np:]) / 2

    return -updates


inv_default_solver = lambda A, b: jnp.linalg.inv(A) @ b
linear_solver = lambda A, b: jsp.linalg.solve(A, b, assume_a="pos")


@jax.jit
def _flatten_samples(x):
    return x.reshape(-1, x.shape[-1])


[docs] class VMC_SRt(AbstractVariationalDriver): r""" Energy minimization using Variational Monte Carlo (VMC) and the kernel formulation of Stochastic Reconfiguration (SR). This approach lead to *exactly* the same parameter updates of the standard SR with a diagonal shift regularization. For this reason, it is equivalent to the standard nk.driver.VMC with the preconditioner nk.optimizer.SR(solver=netket.optimizer.solver.solvers.solve)). In the kernel SR framework, the updates of the parameters can be written as: .. math:: \delta \theta = \tau X(X^TX + \lambda \mathbb{I}_{2M})^{-1} f, where :math:`X \in R^{P \times 2M}` is the concatenation of the real and imaginary part of the centered Jacobian, with P the number of parameters and M the number of samples. The vector f is the concatenation of the real and imaginary part of the centered local energy. Note that, to compute the updates, it is sufficient to invert an :math:`M\times M` matrix instead of a :math:`P\times P` one. As a consequence, this formulation is useful in the typical deep learning regime where :math:`P \gg M`. See `R.Rende, L.L.Viteritti, L.Bardone, F.Becca and S.Goldt <https://arxiv.org/abs/2310.05715>`_ for a detailed description of the derivation. A similar result can be obtained by minimizing the Fubini-Study distance with a specific constrain, see `A.Chen and M.Heyl <https://arxiv.org/abs/2302.01941>`_ for details. """
[docs] def __init__( self, hamiltonian: AbstractOperator, optimizer: Optimizer, *, diag_shift: ScalarOrSchedule, linear_solver_fn: Callable[[jax.Array, jax.Array], jax.Array] = linear_solver, jacobian_mode: Optional[str] = None, variational_state: MCState = None, ): """ Initializes the driver class. Args: hamiltonian: The Hamiltonian of the system. optimizer: Determines how optimization steps are performed given the bare energy gradient. diag_shift: The diagonal shift of the stochastic reconfiguration matrix. Typical values are 1e-4 ÷ 1e-3. Can also be an optax schedule. hamiltonian: The Hamiltonian of the system. linear_solver_fn: Callable to solve the linear problem associated to the updates of the parameters jacobian_mode: The mode used to compute the jacobian of the variational state. Can be `'real'` or `'complex'` (defaults to the dtype of the output of the model). variational_state: The :class:`netket.vqs.MCState` to be optimised. Other variational states are not supported. """ super().__init__(variational_state, optimizer, minimized_quantity_name="Energy") if variational_state.hilbert != hamiltonian.hilbert: raise TypeError( dedent( f"""the variational_state has hilbert space {variational_state.hilbert} (this is normally defined by the hilbert space in the sampler), but the hamiltonian has hilbert space {hamiltonian.hilbert}. The two should match. """ ) ) if self.state.n_parameters % sharding.device_count() != 0: raise NotImplementedError( f""" VMC_SRt requires a network with a number of parameters multiple of the number of MPI devices/ranks in use. You have a network with {self.state.n_parameters}, but there are {sharding.device_count()} MPI ranks in use. To fix this, either add some 'fake' parameters to your network, or change the number of MPI nodes, or contribute some padding logic to NetKet! """ ) if self.state.n_parameters < self.state.n_samples: warnings.warn( UnoptimalSRtWarning(self.state.n_parameters, self.state.n_samples), UserWarning, stacklevel=2, ) self._ham = hamiltonian.collect() # type: AbstractOperator self._dp: PyTree = None self.diag_shift = diag_shift self.jacobian_mode = jacobian_mode self._linear_solver_fn = linear_solver_fn self._params_structure = jax.tree_map( lambda x: jax.ShapeDtypeStruct(x.shape, x.dtype), self.state.parameters ) if not nkjax.tree_ishomogeneous(self._params_structure): raise ValueError( "SRt only supports neural networks with all real or all complex " "parameters. Hybrid structures are not yet supported (but we would welcome " "contributions. Get in touch with us!)" ) _, unravel_params_fn = ravel_pytree(self.state.parameters) self._unravel_params_fn = jax.jit(unravel_params_fn)
@property def jacobian_mode(self) -> str: """ The mode used to compute the jacobian of the variational state. Can be `'real'` or `'complex'`. Real mode truncates imaginary part of the wavefunction, while `complex` does not. This internally uses :func:`netket.jax.jacobian`. See that function for a more complete documentation. """ return self._jacobian_mode @jacobian_mode.setter def jacobian_mode(self, mode: Optional[str]): if mode is None: mode = nkjax.jacobian_default_mode( self.state._apply_fun, self.state.parameters, self.state.model_state, self.state.samples, warn=False, ) if mode not in ["complex", "real"]: raise ValueError( "`jacobian_mode` only supports 'real' for real-valued wavefunctions and" "'complex'.\n\n" "`holomorphic` is not yet supported, but could be contributed in the future." ) self._jacobian_mode = mode def _forward_and_backward(self): """ Performs a number of VMC optimization steps. Args: n_steps (int): Number of steps to perform. """ self.state.reset() # Compute the local energy estimator and average Energy local_energies = self.state.local_estimators(self._ham) self._loss_stats = nkstats.statistics(local_energies) samples = _flatten_samples(self.state.samples) jacobians = nkjax.jacobian( self.state._apply_fun, self.state.parameters, samples, self.state.model_state, mode=self.jacobian_mode, dense=True, center=True, ) # jacobians is centered diag_shift = self.diag_shift if callable(self.diag_shift): diag_shift = diag_shift(self.step_count) updates = SRt( jacobians, local_energies, diag_shift, mode=self.jacobian_mode, solver_fn=self._linear_solver_fn, e_mean=self._loss_stats.Mean, params_structure=self._params_structure, ) self._dp = self._unravel_params_fn(updates) return self._dp @property def energy(self) -> nkstats.Stats: """ Return MCMC statistics for the expectation value of observables in the current state of the driver. """ return self._loss_stats def __repr__(self): return ( "Vmc_SRt(" + f"\n step_count = {self.step_count}," + f"\n state = {self.state})" )