cleanup

BatyLeo · BatyLeo · commit de4575ccf503 · 2025-04-09T15:27:03.000+02:00
diff --git a/src/InferOpt.jl b/src/InferOpt.jl
@@ -62,6 +62,8 @@ include("losses/ssvm_loss.jl")
 include("losses/zero_one_loss.jl")
 include("losses/imitation_loss.jl")
 
+export compute_probability_distribution
+
 export half_square_norm
 export shannon_entropy, negative_shannon_entropy
 export one_hot_argmax, ranking
diff --git a/src/layers/perturbed/perturbation.jl b/src/layers/perturbed/perturbation.jl
@@ -4,7 +4,8 @@ $TYPEDEF
 Abstract type for a perturbation.
 It's a function that takes a parameter `θ` and returns a perturbed parameter by a distribution `perturbation_dist`.
 
-All subtypes should have a `perturbation_dist`
+!!! warning
+    All subtypes should implement a `perturbation_dist` field, which is a `ContinuousUnivariateDistribution`.
 
 # Existing implementations
 - [`AdditivePerturbation`](@ref)
@@ -44,19 +45,37 @@ function (pdc::AdditivePerturbation)(θ::AbstractArray)
     return product_distribution(θ .+ ε * perturbation_dist)
 end
 
+"""
+$TYPEDEF
+
+Method with parameters to compute the gradient of the logdensity of η = θ + εZ w.r.t. θ., with Z ∼ N(0, 1).
+
+# Fields
+$TYPEDFIELDS
+"""
+struct NormalAdditiveGradLogdensity
+    "perturbation size"
+    ε::Float64
+end
+
+function NormalAdditiveGradLogdensity(pdc::AdditivePerturbation)
+    return NormalAdditiveGradLogdensity(pdc.ε)
+end
+
 """
 $TYPEDSIGNATURES
 
 Compute the gradient of the logdensity of η = θ + εZ w.r.t. θ., with Z ∼ N(0, 1).
 """
-function normal_additive_grad_logdensity(ε, η, θ)
+function (f::NormalAdditiveGradLogdensity)(η::AbstractArray, θ::AbstractArray)
+    (; ε) = f
     return ((η .- θ) ./ ε^2,)
 end
 
 """
 $TYPEDEF
 
-Multiplicative perturbation: θ ↦ θ ⊙ exp(εZ - ε²/2)
+Multiplicative perturbation: θ ↦ θ ⊙ exp(εZ - shift)
 
 # Fields
 $TYPEDFIELDS
@@ -66,6 +85,17 @@ struct MultiplicativePerturbation{F}
     perturbation_dist::F
     "perturbation size"
     ε::Float64
+    "optional shift to have 0 mean, default value is ε²/2"
+    shift::Float64
+end
+
+"""
+$TYPEDSIGNATURES
+
+Constructor for [`MultiplicativePerturbation`](@ref).
+"""
+function MultiplicativePerturbation(perturbation_dist, ε, shift=ε^2 / 2)
+    return MultiplicativePerturbation(perturbation_dist, ε, shift)
 end
 
 """
@@ -74,16 +104,42 @@ $TYPEDSIGNATURES
 Apply the multiplicative perturbation to the parameter `θ`.
 """
 function (pdc::MultiplicativePerturbation)(θ::AbstractArray)
-    (; perturbation_dist, ε) = pdc
-    return product_distribution(θ .* ExponentialOf(ε * perturbation_dist - ε^2 / 2))
+    (; perturbation_dist, ε, shift) = pdc
+    return product_distribution(θ .* ExponentialOf(ε * perturbation_dist - shift))
 end
+
+"""
+$TYPEDEF
+
+Method with parameters to compute the gradient of the logdensity of η = θ ⊙ exp(εZ - shift) w.r.t. θ., with Z ∼ N(0, 1).
+
+# Fields
+$TYPEDFIELDS
+"""
+struct NormalMultiplicativeGradLogdensity
+    "perturbation size"
+    ε::Float64
+    "optional shift to have 0 mean"
+    shift::Float64
+end
+
+function NormalMultiplicativeGradLogdensity(pdc::MultiplicativePerturbation)
+    return NormalMultiplicativeGradLogdensity(pdc.ε, pdc.shift)
+end
+
+function NormalMultiplicativeGradLogdensity(ε::Float64, shift=ε^2 / 2)
+    return NormalMultiplicativeGradLogdensity(ε, shift)
+end
+
 """
 $TYPEDSIGNATURES
 
-Compute the gradient of the logdensity of η = θ ⊙ exp(εZ - ε²/2) w.r.t. θ., with Z ∼ N(0, 1).
+Compute the gradient of the logdensity of η = θ ⊙ exp(εZ - shift) w.r.t. θ., with Z ∼ N(0, 1).
+
 !!! warning
     η should be a realization of θ, i.e. should be of the same sign.
 """
-function normal_multiplicative_grad_logdensity(ε, η, θ)
-    return (inv.(ε^2 .* θ) .* (log.(abs.(η)) - log.(abs.(θ)) .+ (ε^2 / 2)),)
+function (f::NormalMultiplicativeGradLogdensity)(η::AbstractArray, θ::AbstractArray)
+    (; ε, shift) = f
+    return (inv.(ε^2 .* θ) .* (log.(abs.(η)) - log.(abs.(θ)) .+ shift),)
 end
diff --git a/src/layers/perturbed/perturbed.jl b/src/layers/perturbed/perturbed.jl
@@ -93,7 +93,7 @@ function PerturbedAdditive(
     threaded=false,
     rng=Random.default_rng(),
     dist_logdensity_grad=if (perturbation_dist == Normal(0, 1))
-        FixFirst(normal_additive_grad_logdensity, ε)
+        NormalAdditiveGradLogdensity(ε)
     else
         nothing
     end,
@@ -126,7 +126,7 @@ function PerturbedMultiplicative(
     threaded=false,
     rng=Random.default_rng(),
     dist_logdensity_grad=if (perturbation_dist == Normal(0, 1))
-        FixFirst(normal_multiplicative_grad_logdensity, ε)
+        NormalMultiplicativeGradLogdensity(float(ε))
     else
         nothing
     end,
diff --git a/src/layers/perturbed/utils.jl b/src/layers/perturbed/utils.jl
@@ -2,6 +2,8 @@
 $TYPEDSIGNATURES
 
 Data structure modeling the exponential of a continuous univariate random variable.
+
+`Random.rand` and `Distributions.logpdf` are defined for the [`ExponentialOf`](@ref) distribution.
 """
 struct ExponentialOf{D<:ContinuousUnivariateDistribution} <:
        ContinuousUnivariateDistribution
@@ -19,7 +21,7 @@ end
 $TYPEDSIGNATURES
 
 Return the log-density of the [`ExponentialOf`](@ref) distribution at `x`.
-It is equal to ``logpdf(d, log(x)) - log(x)``
+It is equal to ``logpdf(d, log(x)) - log(x)``.
 """
 function Distributions.logpdf(d::ExponentialOf, x::Real)
     return logpdf(d.dist, log(x)) - log(x)
diff --git a/src/layers/regularized/abstract_regularized.jl b/src/layers/regularized/abstract_regularized.jl
@@ -10,7 +10,7 @@ with g and h functions of y.
 # Interface
 - `(regularized::AbstractRegularized)(θ; kwargs...)`: return `ŷ(θ)`
 - `compute_regularization(regularized, y)`: return `Ω(y)
-- `get_maximizer(regularized)`: return the associated `GeneralizedMaximizer` optimizer
+- `get_maximizer(regularized)`: return the associated optimizer
 
 # Available implementations
 - [`SoftArgmax`](@ref)
diff --git a/src/losses/fenchel_young_loss.jl b/src/losses/fenchel_young_loss.jl
@@ -10,6 +10,12 @@ Reference: <https://arxiv.org/abs/1901.02324>
 
 # Fields
 - `optimization_layer::AbstractOptimizationLayer`: optimization layer that can be formulated as `ŷ(θ) = argmax {θᵀy - Ω(y)}` (either regularized or perturbed)
+
+# Compatibility
+This loss is compatible with:
+- [`LinearMaximizer`](@ref)-based layers.
+- [`PerturbedOracle`](@ref) layers, with additive or multiplicative perturbations (generic perturbations are not supported).
+- any [`AbstractRegularized`](@ref) layer.
 """
 struct FenchelYoungLoss{O<:AbstractOptimizationLayer} <: AbstractLossLayer
     optimization_layer::O
diff --git a/src/utils/linear_maximizer.jl b/src/utils/linear_maximizer.jl
@@ -43,8 +43,8 @@ function (f::LinearMaximizer)(θ::AbstractArray; kwargs...)
 end
 
 # default is oracles of the form argmax_y θᵀy
-objective_value(::Any, θ, y; kwargs...) = dot(θ, y)
-apply_g(::Any, y; kwargs...) = y
+@inline objective_value(::Any, θ, y; kwargs...) = dot(θ, y)
+@inline apply_g(::Any, y; kwargs...) = y
 
 """
 $TYPEDSIGNATURES