Julia-XAI
diff --git a/‎benchmark/benchmarks.jl‎
Lines changed: 2 additions & 2 deletions b/‎benchmark/benchmarks.jl‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎src/ExplainableAI.jl‎
Lines changed: 1 addition & 2 deletions b/‎src/ExplainableAI.jl‎
Lines changed: 1 addition & 2 deletions
diff --git a/‎src/analyze_api.jl‎
Lines changed: 41 additions & 8 deletions b/‎src/analyze_api.jl‎
Lines changed: 41 additions & 8 deletions
diff --git a/‎src/flux.jl‎
Lines changed: 2 additions & 2 deletions b/‎src/flux.jl‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎src/gradient.jl‎
Lines changed: 20 additions & 10 deletions b/‎src/gradient.jl‎
Lines changed: 20 additions & 10 deletions
diff --git a/‎src/heatmap.jl‎
Lines changed: 33 additions & 24 deletions b/‎src/heatmap.jl‎
Lines changed: 33 additions & 24 deletions
diff --git a/‎src/lrp.jl‎
Lines changed: 4 additions & 4 deletions b/‎src/lrp.jl‎
Lines changed: 4 additions & 4 deletions
diff --git a/‎src/lrp_checks.jl‎
Lines changed: 0 additions & 1 deletion b/‎src/lrp_checks.jl‎
Lines changed: 0 additions & 1 deletion
diff --git a/‎src/lrp_rules.jl‎
Lines changed: 4 additions & 24 deletions b/‎src/lrp_rules.jl‎
Lines changed: 4 additions & 24 deletions
diff --git a/‎src/neuron_selection.jl‎
Lines changed: 17 additions & 9 deletions b/‎src/neuron_selection.jl‎
Lines changed: 17 additions & 9 deletions
@@ -55,9 +55,9 @@ aₖ = randn(Float32, insize)
 layers = Dict(
     "MaxPool" => (MaxPool((3, 3); pad=0), aₖ),
     "Conv" => (Conv((3, 3), 3 => 2), aₖ),
-    "Dense" => (Dense(in_dense, out_dense, relu), randn(Float32, in_dense)),
+    "Dense" => (Dense(in_dense, out_dense, relu), randn(Float32, in_dense, 1)),
     "WrappedDense" =>
-        (TestWrapper(Dense(in_dense, out_dense, relu)), randn(Float32, in_dense)),
+        (TestWrapper(Dense(in_dense, out_dense, relu)), randn(Float32, in_dense, 1)),
 )
 rules = Dict(
     "ZeroRule" => ZeroRule(),
 
@@ -1,7 +1,6 @@
 module ExplainableAI
 
 using Base.Iterators
-using LinearAlgebra
 using Flux
 using Zygote
 using Tullio
@@ -14,10 +13,10 @@ using ColorSchemes
 using Markdown
 using PrettyTables
 
+include("neuron_selection.jl")
 include("analyze_api.jl")
 include("flux.jl")
 include("utils.jl")
-include("neuron_selection.jl")
 include("gradient.jl")
 include("lrp_checks.jl")
 include("lrp_rules.jl")
 
@@ -1,6 +1,11 @@
 abstract type AbstractXAIMethod end
-# All analyzers are implemented such that they return an explanation and the model output:
-#   (method::AbstractXAIMethod)(input, ns::AbstractNeuronSelector) -> (expl, output)
+# All analyzers are implemented such that they return an array of explanations:
+#   (method::AbstractXAIMethod)(input, ns::AbstractNeuronSelector)::Vector{Explanation}
+
+const BATCHDIM_MISSING = ArgumentError(
+    """The input is a 1D vector and therefore missing the required batch dimension.
+    Call analyze with the keyword argument add_batch_dim=false."""
+)
 
 """
     analyze(input, method)
@@ -9,29 +14,57 @@ abstract type AbstractXAIMethod end
 Return raw classifier output and explanation.
 If `neuron_selection` is specified, the explanation will be calculated for that neuron.
 Otherwise, the output neuron with the highest activation is automatically chosen.
+
+## Keyword arguments
+- `add_batch_dim`: add batch dimension to the input without allocating. Default is `false`.
 """
 function analyze(
     input::AbstractArray{<:Real},
     method::AbstractXAIMethod,
-    neuron_selection::Integer;
+    neuron_selection::Union{Integer,Tuple{<:Integer}};
     kwargs...,
 )
-    return method(input, IndexNS(neuron_selection); kwargs...)
+    return _analyze(input, method, IndexSelector(neuron_selection); kwargs...)
 end
 
 function analyze(input::AbstractArray{<:Real}, method::AbstractXAIMethod; kwargs...)
-    return method(input, MaxActivationNS(); kwargs...)
+    return _analyze(input, method, MaxActivationSelector(); kwargs...)
+end
+
+function (method::AbstractXAIMethod)(
+    input::AbstractArray{<:Real},
+    neuron_selection::Union{Integer,Tuple{<:Integer}};
+    kwargs...,
+)
+    return _analyze(input, method, IndexSelector(neuron_selection); kwargs...)
 end
 function (method::AbstractXAIMethod)(input::AbstractArray{<:Real}; kwargs...)
-    return method(input, MaxActivationNS(); kwargs...)
+    return _analyze(input, method, MaxActivationSelector(); kwargs...)
+end
+
+# lower-level call to method
+function _analyze(
+    input::AbstractArray{T,N},
+    method::AbstractXAIMethod,
+    sel::AbstractNeuronSelector;
+    add_batch_dim::Bool=false,
+    kwargs...,
+) where {T<:Real,N}
+    if add_batch_dim
+        return method(batch_dim_view(input), sel; kwargs...)
+    end
+    N < 2 && throw(BATCHDIM_MISSING)
+    return method(input, sel; kwargs...)
 end
 
+# for convenience, the anaylyzer can be called directly
+
 # Explanations and outputs are returned in a wrapper.
 # Metadata such as the analyzer allows dispatching on functions like `heatmap`.
-struct Explanation{A,O,L}
+struct Explanation{A,O,I,L}
     attribution::A
     output::O
-    neuron_selection::Int
+    neuron_selection::I
     analyzer::Symbol
     layerwise_relevances::L
 end
@@ -35,12 +35,12 @@ has_output_softmax(x) = is_softmax(x)
 has_output_softmax(model::Chain) = has_output_softmax(model[end])
 
 """
-    check_ouput_softmax(model)
+    check_output_softmax(model)
 
 Check whether model has softmax activation on output.
 Return the model if it doesn't, throw error otherwise.
 """
-function check_ouput_softmax(model::Chain)
+function check_output_softmax(model::Chain)
     if has_output_softmax(model)
         throw(ArgumentError("""Model contains softmax activation on output.
                             Call `strip_softmax` on your model first."""))
 
@@ -1,5 +1,15 @@
-function gradient_wrt_input(model, input::T, output_neuron)::T where {T}
-    return only(gradient((in) -> model(in)[output_neuron], input))
+function gradient_wrt_input(model, input::T, output_indices) where {T}
+    return only(gradient((in) -> model(in)[output_indices], input))
+end
+
+function gradients_wrt_batch(model, input::AbstractArray{T,N}, output_indices) where {T,N}
+    # To avoid computing a sparse jacobian, we compute individual gradients
+    # by mapping `gradient_wrt_input` on slices of the input along the batch dimension.
+    return mapreduce(
+        (gs...) -> cat(gs...; dims=N), zip(eachslice(input; dims=N), output_indices)
+    ) do (in, idx)
+        gradient_wrt_input(model, batch_dim_view(in), drop_batch_dim(idx))
+    end
 end
 
 """
@@ -9,13 +19,13 @@ Analyze model by calculating the gradient of a neuron activation with respect to
 """
 struct Gradient{C<:Chain} <: AbstractXAIMethod
     model::C
-    Gradient(model::Chain) = new{typeof(model)}(Flux.testmode!(check_ouput_softmax(model)))
+    Gradient(model::Chain) = new{typeof(model)}(Flux.testmode!(check_output_softmax(model)))
 end
 function (analyzer::Gradient)(input, ns::AbstractNeuronSelector)
     output = analyzer.model(input)
-    output_neuron = ns(output)
-    grad = gradient_wrt_input(analyzer.model, input, output_neuron)
-    return Explanation(grad, output, output_neuron, :Gradient, Nothing)
+    output_indices = ns(output)
+    grad = gradients_wrt_batch(analyzer.model, input, output_indices)
+    return Explanation(grad, output, output_indices, :Gradient, Nothing)
 end
 
 """
@@ -27,12 +37,12 @@ This gradient is then multiplied element-wise with the input.
 struct InputTimesGradient{C<:Chain} <: AbstractXAIMethod
     model::C
     function InputTimesGradient(model::Chain)
-        return new{typeof(model)}(Flux.testmode!(check_ouput_softmax(model)))
+        return new{typeof(model)}(Flux.testmode!(check_output_softmax(model)))
     end
 end
 function (analyzer::InputTimesGradient)(input, ns::AbstractNeuronSelector)
     output = analyzer.model(input)
-    output_neuron = ns(output)
-    attr = input .* gradient_wrt_input(analyzer.model, input, output_neuron)
-    return Explanation(attr, output, output_neuron, :InputTimesGradient, Nothing)
+    output_indices = ns(output)
+    attr = input .* gradients_wrt_batch(analyzer.model, input, output_indices)
+    return Explanation(attr, output, output_indices, :InputTimesGradient, Nothing)
 end
@@ -2,9 +2,9 @@
 
 const HEATMAPPING_PRESETS = Dict{Symbol,Tuple{ColorScheme,Symbol,Symbol}}(
     # Analyzer => (colorscheme, reduce, normalize)
-    :LRP => (ColorSchemes.bwr, :sum, :centered),
-    :InputTimesGradient => (ColorSchemes.bwr, :sum, :centered), # same as LRP
-    :Gradient => (ColorSchemes.grays, :norm, :extrema),
+    :LRP => (ColorSchemes.bwr, :sum, :centered), # attribution
+    :InputTimesGradient => (ColorSchemes.bwr, :sum, :centered), # attribution
+    :Gradient => (ColorSchemes.grays, :norm, :extrema), # gradient
 )
 
 """
@@ -34,35 +34,32 @@ Assumes Flux's WHCN convention (width, height, color channels, batch size).
     When calling `heatmap` with an `Explanation` or analyzer, the method default is selected.
     When calling `heatmap` with an array, the default for use with the `bwr` colorscheme is `:centered`.
 - `permute::Bool`: Whether to flip W&H input channels. Default is `true`.
+- `unpack_singleton::Bool`: When heatmapping a batch with a single sample, setting `unpack_singleton=true`
+    will return an image instead of an Vector containing a single image.
 
 **Note:** these keyword arguments can't be used when calling `heatmap` with an analyzer.
 """
 function heatmap(
-    attr::AbstractArray;
+    attr::AbstractArray{T,N};
     cs::ColorScheme=ColorSchemes.bwr,
     reduce::Symbol=:sum,
     normalize::Symbol=:centered,
     permute::Bool=true,
-)
-    _size = size(attr)
-    length(_size) != 4 && throw(
+    unpack_singleton::Bool=true,
+) where {T,N}
+    N != 4 && throw(
         DomainError(
-            _size,
+            N,
             """heatmap assumes Flux's WHCN convention (width, height, color channels, batch size) for the input.
             Please reshape your attribution to match this format if your model doesn't adhere to this convention.""",
         ),
     )
-    _size[end] != 1 && throw(
-        DomainError(
-            _size[end],
-            """heatmap is only applicable to a single attribution, got a batch dimension of $(_size[end]).""",
-        ),
-    )
-
-    img = _normalize(dropdims(_reduce(dropdims(attr; dims=4), reduce); dims=3), normalize)
-    permute && (img = permutedims(img))
-    return ColorSchemes.get(cs, img)
+    if unpack_singleton && size(attr, 4) == 1
+        return _heatmap(attr[:, :, :, 1], cs, reduce, normalize, permute)
+    end
+    return map(a -> _heatmap(a, cs, reduce, normalize, permute), eachslice(attr; dims=4))
 end
+
 # Use HEATMAPPING_PRESETS for default kwargs when dispatching on Explanation
 function heatmap(expl::Explanation; permute::Bool=true, kwargs...)
     _cs, _reduce, _normalize = HEATMAPPING_PRESETS[expl.analyzer]
@@ -79,6 +76,18 @@ function heatmap(input, analyzer::AbstractXAIMethod, args...; kwargs...)
     return heatmap(analyze(input, analyzer, args...; kwargs...))
 end
 
+# Lower level function that is mapped along batch dimension
+function _heatmap(
+    attr::AbstractArray{T,3},
+    cs::ColorScheme,
+    reduce::Symbol,
+    normalize::Symbol,
+    permute::Bool,
+) where {T<:Real}
+    img = _normalize(dropdims(_reduce(attr, reduce); dims=3), normalize)
+    permute && (img = permutedims(img))
+    return ColorSchemes.get(cs, img)
+end
 
 # Normalize activations across pixels
 function _normalize(attr, method::Symbol)
@@ -97,15 +106,15 @@ function _normalize(attr, method::Symbol)
 end
 
 # Reduce attributions across color channels into a single scalar – assumes WHCN convention
-function _reduce(attr::T, method::Symbol) where {T}
-    if size(attr, 3) == 1 # nothing need to reduce
+function _reduce(attr::AbstractArray{T,3}, method::Symbol) where {T}
+    if size(attr, 3) == 1 # nothing to reduce
         return attr
+    elseif method == :sum
+        return reduce(+, attr; dims=3)
     elseif method == :maxabs
-        return maximum(abs, attr; dims=3)
+        return reduce((c...) -> maximum(abs.(c)), attr; dims=3, init=zero(T))
     elseif method == :norm
-        return mapslices(norm, attr; dims=3)::T
-    elseif method == :sum
-        return sum(attr; dims=3)
+        return reduce((c...) -> sqrt(sum(c .^ 2)), attr; dims=3, init=zero(T))
     end
     throw(
         ArgumentError(
 
@@ -25,7 +25,7 @@ struct LRP{R<:AbstractVector{<:AbstractLRPRule}} <: AbstractXAIMethod
     )
         model = flatten_model(model)
         if !skip_checks
-            check_ouput_softmax(model)
+            check_output_softmax(model)
             check_model(Val(:LRP), model; verbose=verbose)
         end
         if length(model.layers) != length(rules)
@@ -59,9 +59,9 @@ function (analyzer::LRP)(
     rels = similar.(acts)
 
     # Mask output neuron
-    output_neuron = ns(acts[end])
+    output_indices = ns(acts[end])
     rels[end] .= zero(T)
-    rels[end][output_neuron] = acts[end][output_neuron]
+    rels[end][output_indices] = acts[end][output_indices]
 
     # Backward pass through layers, applying LRP rules
     for (i, rule) in Iterators.reverse(enumerate(analyzer.rules))
@@ -71,7 +71,7 @@ function (analyzer::LRP)(
     return Explanation(
         first(rels),
         last(acts),
-        output_neuron,
+        output_indices,
         :LRP,
         ifelse(layerwise_relevances, rels, Nothing),
     )
 
@@ -114,5 +114,4 @@ function check_model(::Val{:LRP}, c::Chain; verbose=true)
         )
         throw(ArgumentError("Unknown or unsupported activation functions found in model"))
     end
-    return false
 end
@@ -1,27 +1,7 @@
-# Generic implementation of LRP according to [1, 2].
-# LRP-rules are implemented as structs of type `AbstractLRPRule`.
-# Through the magic of multiple dispatch, rule modifications such as LRP-γ and -ϵ
-# can be implemented by dispatching on the functions `modify_params` & `modify_denominator`,
-# which make use of the generalized LRP implementation shown in [1].
-#
-# If the relevance propagation falls outside of this scheme, custom low-level functions
-# ```julia
-# lrp!(::MyLRPRule, layer, Rₖ, aₖ, Rₖ₊₁) = ...
-# lrp!(::MyLRPRule, layer::MyLayer, Rₖ, aₖ, Rₖ₊₁) = ...
-# lrp!(::AbstractLRPRule, layer::MyLayer, Rₖ, aₖ, Rₖ₊₁) = ...
-# ```
-# that inplace-update `Rₖ` can be implemented.
-# This is used for the ZBoxRule and for faster computations on common layers.
-#
-# References:
-# [1] G. Montavon et al., Layer-Wise Relevance Propagation: An Overview
-# [2] W. Samek et al., Explaining Deep Neural Networks and Beyond: A Review of Methods and Applications
-
+# https://adrhill.github.io/ExplainableAI.jl/stable/generated/advanced_lrp/#How-it-works-internally
 abstract type AbstractLRPRule end
 
-# This is the generic relevance propagation rule which is used for the 0, γ and ϵ rules.
-# It can be extended for new rules via `modify_denominator` and `modify_params`.
-# Since it uses autodiff, it is used as a fallback for layer types without custom implementation.
+# Generic LRP rule. Since it uses autodiff, it is used as a fallback for layer types without custom implementation.
 function lrp!(rule::R, layer::L, Rₖ, aₖ, Rₖ₊₁) where {R<:AbstractLRPRule,L}
     lrp_autodiff!(rule, layer, Rₖ, aₖ, Rₖ₊₁)
     return nothing
@@ -50,8 +30,8 @@ end
 
 function lrp_dense!(rule::R, l, Rₖ, aₖ, Rₖ₊₁) where {R<:AbstractLRPRule}
     ρW, ρb = modify_params(rule, get_params(l)...)
-    ãₖ₊₁ = modify_denominator(rule, ρW * aₖ + ρb)
-    @tullio Rₖ[j] = aₖ[j] * ρW[k, j] / ãₖ₊₁[k] * Rₖ₊₁[k]
+    ãₖ₊₁ = modify_denominator(rule, ρW * aₖ .+ ρb)
+    @tullio Rₖ[j, b] = aₖ[j, b] * ρW[k, j] / ãₖ₊₁[k, b] * Rₖ₊₁[k, b]
     return nothing
 end
 
 
@@ -1,21 +1,29 @@
 abstract type AbstractNeuronSelector end
-(ns::AbstractNeuronSelector)(output::AbstractArray) = ns(drop_singleton_dims(output))
 
 """
-    MaxActivationNS()
+    MaxActivationSelector()
 
 Neuron selector that picks the output neuron with the highest activation.
 """
-struct MaxActivationNS <: AbstractNeuronSelector end
-(::MaxActivationNS)(output::AbstractVector) = argmax(output)
+struct MaxActivationSelector <: AbstractNeuronSelector end
+function (::MaxActivationSelector)(out::AbstractArray{T,N}) where {T,N}
+    N < 2 && throw(BATCHDIM_MISSING)
+    return Vector{CartesianIndex{N}}([argmax(out; dims=1:(N - 1))...])
+end
 
 """
-    IndexNS(index)
+    IndexSelector(index)
 
 Neuron selector that picks the output neuron at the given index.
 """
-struct IndexNS{T} <: AbstractNeuronSelector
-    index::T
-    IndexNS(index::Integer) = new{typeof(index)}(index)
+struct IndexSelector{I} <: AbstractNeuronSelector
+    index::I
+end
+function (s::IndexSelector{<:Integer})(out::AbstractArray{T,N}) where {T,N}
+    N < 2 && throw(BATCHDIM_MISSING)
+    return CartesianIndex{N}.(s.index, 1:size(out, N))
+end
+function (s::IndexSelector{I})(out::AbstractArray{T,N}) where {I,T,N}
+    N < 2 && throw(BATCHDIM_MISSING)
+    return CartesianIndex{N}.(s.index..., 1:size(out, N))
 end
-(ns::IndexNS)(output::AbstractVector) = ns.index
Original file line number	Diff line number	Diff line change
`@@ -114,5 +114,4 @@ function check_model(::Val{:LRP}, c::Chain; verbose=true)`
`114`	`114`	`)`
`115`	`115`	`throw(ArgumentError("Unknown or unsupported activation functions found in model"))`
`116`	`116`	`end`
`117`		`- return false`
`118`	`117`	`end`