Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use DifferentiationInterface for autodiff, allow ADTypes #153

Draft
wants to merge 10 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 9 additions & 4 deletions Project.toml
Original file line number Diff line number Diff line change
@@ -1,20 +1,25 @@
name = "NLSolversBase"
uuid = "d41bc354-129a-5804-8e4c-c37616107c6c"
version = "7.8.3"
version = "7.9.0"

[deps]
ADTypes = "47edcb42-4c32-4615-8424-f2b9edc5f35b"
DiffResults = "163ba53b-c6d8-5494-b064-1a9d43ac40c5"
DifferentiationInterface = "a0c0ee7d-e4b9-4e03-894e-1c5f64a51d63"
Distributed = "8ba89e20-285c-5b6f-9357-94700520ee1b"
FiniteDiff = "6a86dc24-6348-571c-b903-95158fe2bd41"
ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210"

[compat]
ADTypes = "1.11.0"
DiffResults = "1.0"
ForwardDiff = "0.10"
DifferentiationInterface = "0.6.24"
FiniteDiff = "2.0"
julia = "1.5"
ForwardDiff = "0.10"
julia = "1.10"

[extras]
ADTypes = "47edcb42-4c32-4615-8424-f2b9edc5f35b"
ComponentArrays = "b0b7db55-cfe3-40fc-9ded-d10e2dbeff66"
LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
OptimTestProblems = "cec144fc-5a64-5bc6-99fb-dde8f63e154c"
Expand All @@ -24,4 +29,4 @@ SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf"
Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"

[targets]
test = ["ComponentArrays", "LinearAlgebra", "OptimTestProblems", "Random", "RecursiveArrayTools", "SparseArrays", "Test"]
test = ["ADTypes", "ComponentArrays", "LinearAlgebra", "OptimTestProblems", "Random", "RecursiveArrayTools", "SparseArrays", "Test"]
12 changes: 12 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,18 @@ There are currently three main types: `NonDifferentiable`, `OnceDifferentiable`,

The words in front of `Differentiable` in the type names (`Non`, `Once`, `Twice`) are not meant to indicate a specific classification of the function as such (a `OnceDifferentiable` might be constructed for an infinitely differentiable function), but signals to an algorithm if the correct functions have been constructed or if automatic differentiation should be used to further differentiate the function.

## Automatic differentiation

Some constructors for `OnceDifferentiable`, `TwiceDifferentiable`, `OnceDifferentiableConstraints` and `TwiceDifferentiableConstraints` accept a positional argument called `autodiff`.
This argument can be either:

- An object subtyping `AbstractADType`, defined by [ADTypes.jl](https://github.com/SciML/ADTypes.jl) and supported by [DifferentiationInterface.jl](https://github.com/JuliaDiff/DifferentiationInterface.jl).
- A `Symbol` like `:finite` (and variants thereof) or `:forward`, which fall back on `ADTypes.AutoFiniteDiff` and `ADTypes.AutoForwardDiff` respectively.
- A `Bool`, namely `true`, which falls back on `ADTypes.AutoForwardDiff`.

When the positional argument `chunk` is passed, it is used to configure chunk size in `ADTypes.AutoForwardDiff`, but _only_ if `autodiff in (:forward, true)`.
Indeed, if `autodiff isa ADTypes.AutoForwardDiff`, we assume that the user already selected the appropriate chunk size and so `chunk` is ignored.

## Examples
#### Optimization
Say we want to minimize the Hosaki test function
Expand Down
17 changes: 17 additions & 0 deletions src/NLSolversBase.jl
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@ __precompile__(true)

module NLSolversBase

using ADTypes: AbstractADType, AutoForwardDiff, AutoFiniteDiff
import DifferentiationInterface as DI
using FiniteDiff, ForwardDiff, DiffResults
import Distributed: clear!
export AbstractObjective,
Expand Down Expand Up @@ -54,9 +56,24 @@ function finitediff_fdtype(autodiff)
fdtype
end

forwarddiff_chunksize(::Nothing) = nothing
forwarddiff_chunksize(::ForwardDiff.Chunk{C}) where {C} = C

is_finitediff(autodiff) = autodiff ∈ (:central, :finite, :finiteforward, :finitecomplex)
is_forwarddiff(autodiff) = autodiff ∈ (:forward, :forwarddiff, true)

get_adtype(autodiff::AbstractADType, chunk=nothing) = autodiff

function get_adtype(autodiff::Union{Symbol,Bool}, chunk=nothing)
if is_finitediff(autodiff)
return AutoFiniteDiff(; fdtype=finitediff_fdtype(autodiff)())
elseif is_forwarddiff(autodiff)
return AutoForwardDiff(; chunksize=forwarddiff_chunksize(chunk))
else
error("The autodiff value $autodiff is not supported. Use :finite or :forward.")
end
end

x_of_nans(x, Tf=eltype(x)) = fill!(Tf.(x), Tf(NaN))

include("objective_types/inplace_factory.jl")
Expand Down
190 changes: 39 additions & 151 deletions src/objective_types/constraints.jl
Original file line number Diff line number Diff line change
Expand Up @@ -139,27 +139,13 @@ function OnceDifferentiableConstraints(c!, lx::AbstractVector, ux::AbstractVecto
xcache = zeros(T, sizex)
ccache = zeros(T, sizec)

if is_finitediff(autodiff)
ccache2 = similar(ccache)
fdtype = finitediff_fdtype(autodiff)
jacobian_cache = FiniteDiff.JacobianCache(xcache, ccache,ccache2,fdtype)
function jfinite!(J, x)
FiniteDiff.finite_difference_jacobian!(J, c!, x, jacobian_cache)
J
end
return OnceDifferentiableConstraints(c!, jfinite!, bounds)
elseif is_forwarddiff(autodiff)
jac_cfg = ForwardDiff.JacobianConfig(c!, ccache, xcache, chunk)
ForwardDiff.checktag(jac_cfg, c!, xcache)

function jforward!(J, x)
ForwardDiff.jacobian!(J, c!, ccache, x, jac_cfg, Val{false}())
J
end
return OnceDifferentiableConstraints(c!, jforward!, bounds)
else
error("The autodiff value $autodiff is not support. Use :finite or :forward.")
backend = get_adtype(autodiff, chunk)
jac_prep = DI.prepare_jacobian(c!, ccache, backend, xcache)
function j!(_j, _x)
DI.jacobian!(c!, ccache, _j, jac_prep, backend, _x)
Comment on lines +144 to +145
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe these could be made callable structs to avoid closing over c!, ccache, jac_prep and backend?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

These were already closures in the existing code, I thought I'd go for the "minimum diff" changes and then we could improve later on

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, I saw that. It seemed you're closing over more variables in the DI version though, so it might be even more valuable to change the design.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I seem to recall that closures are not an issue if the variable we close over does not get assigned to more than once? So we might be good here?
In any case, to hunt down this kind of type inference barriers we would also need to improve annotation of functions in the various structs of the package.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't know. Even if it's currently the case, personally I wouldn't rely on it since in my experience the exact behaviour of the compiler is inherently unstable.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

return _j
end
return OnceDifferentiableConstraints(c!, j!, bounds)
end


Expand All @@ -179,153 +165,55 @@ function TwiceDifferentiableConstraints(c!, lx::AbstractVector, ux::AbstractVect
lc::AbstractVector, uc::AbstractVector,
autodiff::Symbol = :central,
chunk::ForwardDiff.Chunk = checked_chunk(lx))
if is_finitediff(autodiff)
fdtype = finitediff_fdtype(autodiff)
return twicediff_constraints_finite(c!,lx,ux,lc,uc,fdtype,nothing)
elseif is_forwarddiff(autodiff)
return twicediff_constraints_forward(c!,lx,ux,lc,uc,chunk,nothing)
else
error("The autodiff value $autodiff is not support. Use :finite or :forward.")
end
end

function TwiceDifferentiableConstraints(c!, con_jac!,lx::AbstractVector, ux::AbstractVector,
lc::AbstractVector, uc::AbstractVector,
autodiff::Symbol = :central,
chunk::ForwardDiff.Chunk = checked_chunk(lx))
if is_finitediff(autodiff)
fdtype = finitediff_fdtype(autodiff)
return twicediff_constraints_finite(c!,lx,ux,lc,uc,fdtype,con_jac!)
elseif is_forwarddiff(autodiff)
return twicediff_constraints_forward(c!,lx,ux,lc,uc,chunk,con_jac!)
else
error("The autodiff value $autodiff is not support. Use :finite or :forward.")
end
end



function TwiceDifferentiableConstraints(lx::AbstractArray, ux::AbstractArray)
bounds = ConstraintBounds(lx, ux, [], [])
TwiceDifferentiableConstraints(bounds)
end


function twicediff_constraints_forward(c!, lx, ux, lc, uc,chunk,con_jac! = nothing)
bounds = ConstraintBounds(lx, ux, lc, uc)
T = eltype(bounds)
nc = length(lc)
nx = length(lx)
x_example = zeros(T, nx)
λ_example = zeros(T, nc)
ccache = zeros(T, nc)
xcache = zeros(T, nx)
cache_check = Ref{DataType}(Missing) #the datatype Missing, not the singleton
ref_f= Ref{Any}() #cache for intermediate jacobian used in the hessian
cxxcache = zeros(T, nx * nc, nx) #output cache for hessian
h = reshape(cxxcache, (nc, nx, nx)) #reshaped output
hi = [@view h[i,:,:] for i in 1:nc]
#ref_f caches the closure function with its caches. other aproaches include using a Dict, but the
#cost of switching happens just once per optimize call.

if isnothing(con_jac!) #if the jacobian is not provided, generate one
jac_cfg = ForwardDiff.JacobianConfig(c!, ccache, xcache, chunk)
ForwardDiff.checktag(jac_cfg, c!, xcache)

jac! = (J, x) -> begin
ForwardDiff.jacobian!(J, c!, ccache, x, jac_cfg, Val{false}())
J
end

function sum_constraints(_x, _λ)
# TODO: get rid of this allocation with DI.Cache
ccache_righttype = zeros(promote_type(T, eltype(_x)), nc)
c!(ccache_righttype, _x)
return sum(_λ[i] * ccache[i] for i in eachindex(_λ, ccache))
end

con_jac_cached = x -> begin
exists_cache = (cache_check[] == eltype(x))
if exists_cache
f = ref_f[]
return f(x)
else
jcache = zeros(eltype(x), nc)
out_cache = zeros(eltype(x), nc, nx)
cfg_cache = ForwardDiff.JacobianConfig(c!,jcache,x)
f = z->ForwardDiff.jacobian!(out_cache, c!, jcache, z,cfg_cache,Val{false}())
ref_f[] = f
cache_check[]= eltype(x)
return f(x)
end
end
backend = get_adtype(autodiff, chunk)

else
jac! = (J,x) -> con_jac!(J,x)

#here, the cache should also include a JacobianConfig
con_jac_cached = x -> begin
exists_cache = (cache_check[] == eltype(x))
if exists_cache
f = ref_f[]
return f(x)
else
out_cache = zeros(eltype(x), nc, nx)
f = z->jac!(out_cache,x)
ref_f[] = f
cache_check[]= eltype(x)
return f(x)
end
end

jac_prep = DI.prepare_jacobian(c!, ccache, backend, x_example)
function con_jac!(_j, _x)
DI.jacobian!(c!, ccache, _j, jac_prep, backend, _x)
return _j
end

hess_config_cache = ForwardDiff.JacobianConfig(typeof(con_jac_cached),lx)
function con_hess!(hess, x, λ)
ForwardDiff.jacobian!(cxxcache, con_jac_cached, x,hess_config_cache,Val{false}())
for i = 1:nc #hot hessian loop
hess+=λ[i].*hi[i]
end
return hess

hess_prep = DI.prepare_hessian(sum_constraints, backend, x_example, DI.Constant(λ_example))
function con_hess!(_h, _x, _λ)
DI.hessian!(sum_constraints, _h, hess_prep, backend, _x, DI.Constant(_λ))
return _h
end

return TwiceDifferentiableConstraints(c!, jac!, con_hess!, bounds)
return TwiceDifferentiableConstraints(c!, con_jac!, con_hess!, bounds)
end


function twicediff_constraints_finite(c!,lx,ux,lc,uc,fdtype,con_jac! = nothing)
bounds = ConstraintBounds(lx, ux, lc, uc)
T = eltype(bounds)
nx = length(lx)
nc = length(lc)
xcache = zeros(T, nx)
ccache = zeros(T, nc)
function TwiceDifferentiableConstraints(c!, con_jac!,lx::AbstractVector, ux::AbstractVector,
lc::AbstractVector, uc::AbstractVector,
autodiff::Symbol = :central,
chunk::ForwardDiff.Chunk = checked_chunk(lx))
# TODO: is con_jac! still useful? we ignore it here
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What should we do about this? The new version of the code directly computes the Hessian of the sum of constraints


if isnothing(con_jac!)
jac_ccache = similar(ccache)
jacobian_cache = FiniteDiff.JacobianCache(xcache, ccache,jac_ccache,fdtype)
function jac!(J, x)
FiniteDiff.finite_difference_jacobian!(J, c!, x, jacobian_cache)
J
end
else
jac! = (J,x) -> con_jac!(J,x)
end
cxxcache = zeros(T,nc*nx,nx) # to create cached jacobian
h = reshape(cxxcache, (nc, nx, nx)) #reshaped output
hi = [@view h[i,:,:] for i in 1:nc]

function jac_vec!(J,x) #to evaluate the jacobian of a jacobian, FiniteDiff needs a vector version of that
j_mat = reshape(J,nc,nx)
return jac!(j_mat,x)
return J
end
hess_xcache =similar(xcache)
hess_cxcache =zeros(T,nc*nx) #output of jacobian, as a vector
hess_cxxcache =similar(hess_cxcache)
hess_config_cache = FiniteDiff.JacobianCache(hess_xcache,hess_cxcache,hess_cxxcache,fdtype)
function con_hess!(hess, x, λ)
FiniteDiff.finite_difference_jacobian!(cxxcache, jac_vec!, x,hess_config_cache)
for i = 1:nc
hi = @view h[i,:,:]
hess+=λ[i].*hi
end
return hess
end
return TwiceDifferentiableConstraints(c!, jac!, con_hess!, bounds)
return TwiceDifferentiableConstraints(c!, lx, ux, lc, uc, autodiff, chunk)
end



function TwiceDifferentiableConstraints(lx::AbstractArray, ux::AbstractArray)
bounds = ConstraintBounds(lx, ux, [], [])
TwiceDifferentiableConstraints(bounds)
end

function TwiceDifferentiableConstraints(bounds::ConstraintBounds)
c! = (x, c)->nothing
J! = (x, J)->nothing
Expand Down
Loading
Loading