diff --git a/Project.toml b/Project.toml index 3ded779..93bc9f6 100644 --- a/Project.toml +++ b/Project.toml @@ -1,6 +1,6 @@ name = "PointProcesses" uuid = "af0b7596-9bb0-472a-a012-63904f2b4c55" -version = "0.6.0" +version = "0.7.0" authors = ["Guillaume Dalle", "José Kling", "Julien Chevallier", "Ryan Senne"] [deps] diff --git a/docs/examples/Hawkes.jl b/docs/examples/Hawkes.jl index f5f90e7..1f22523 100644 --- a/docs/examples/Hawkes.jl +++ b/docs/examples/Hawkes.jl @@ -94,10 +94,16 @@ plot(p1, p2, p3; layout=(3, 1), size=(800, 600)) # in PointProcesses.jl to fit a piecewise constant intensity function to the data. This will give us an idea of the average litter box usage over the course of a day. # While this will not give us any information about self-exciting behavior, it will help us understand the daily patterns of litter box usage. -tod = minute.(data.TimestampDT) .+ 60 .* hour.(data.TimestampDT) # time-of-day in minutes since midnight +tod_raw = Float64.(minute.(data.TimestampDT) .+ 60 .* hour.(data.TimestampDT)) # time-of-day in minutes since midnight day = Date.(data.TimestampDT) n_days = length(unique(day)) -h_day = History(sort(Float64.(tod)), 0.0, 1440.0) # build a "history" on [0, 1440] minutes +# Collapsing many days into a single [0, 1440) window produces events at the +# same minute-of-day across different days. Add a tiny deterministic jitter +# (≪ the 15-minute bin width below) so the events are strictly ordered for +# `History`. The piecewise-constant fit aggregates into bins, so the jitter is +# invisible to the resulting intensity. +tod = sort(tod_raw .+ (1:length(tod_raw)) .* 1e-6) +h_day = History(tod, 0.0, 1440.0) # build a "history" on [0, 1440] minutes nbins = 96 # 96 bins = 15-minute bins pp_day = fit( InhomogeneousPoissonProcess{PiecewiseConstantIntensity{Float64},Dirac{Nothing}}, @@ -121,7 +127,7 @@ plot( # Now, let's fit a Hawkes process to the data. We will for now, ignore the cat identities and fit a single Hawkes process to all the data. # The goal of this analysis is to understand the self-exciting nature of the litter box entries. I.e., if one cat uses the litter box, # does that increase the likelihood of another cat using it soon after? To do this, we can use the implementation in PointProcesses.jl -full_history = History(data.t, 0.0, maximum(data.t) + 1.0) +full_history = History(sort(data.t), 0.0, maximum(data.t) + 1.0) hawkes_model = fit(HawkesProcess, full_history) println("Fitted Hawkes Process Parameters:") # hide diff --git a/docs/src/examples/Hawkes.md b/docs/src/examples/Hawkes.md index d680a98..6af2ed0 100644 --- a/docs/src/examples/Hawkes.md +++ b/docs/src/examples/Hawkes.md @@ -15,9 +15,9 @@ where μ(t) is the baseline intensity, which accounts for spontaneous events, `` which controls how past events influence future events. One of the most common choices for this kernel is the exponential function: ``\phi(s) = \alpha\exp{-\beta s}``. In this model, α is the strength of self-exciting, and β controls the rate of excitation decay. -Given these, parameters, the intensity function is: ``λ(t|\mathcal H_t) = μ + \sum_{t_i < t} \alpha \exp{-\beta (t - t_i)}``. We assume that μ is constant for simplicity. But, we could generalzie this to be inhomogeneous. +Given these parameters, the intensity function is: ``λ(t|\mathcal H_t) = μ + \sum_{t_i < t} \alpha \exp{-\beta (t - t_i)}``. We assume that μ is constant for simplicity. But, we could generalize this to be inhomogeneous. -An important statistic of the Hawkes proces, is the branching ratio which is a measure of the expected number of "daughter" events, a "parent" event will create. For the exponential kernel, this is given by the following equation: +An important statistic of the Hawkes process is the branching ratio which is a measure of the expected number of "daughter" events, a "parent" event will create. For the exponential kernel, this is given by the following equation: ``n = \frac{\alpha}{\beta}``. In the event that n < 1, the process is subcritical, meaning that events will eventually die out. If n = 1, the process is critical, and if n > 1, the process is supercritical, meaning that events can lead to an infinite cascade of events. Let's now apply this theory, and develop more through the process, by fitting a Hawkes process to some data. @@ -36,7 +36,8 @@ First let's open our data. This data records litter box entries taken from three ````@example Hawkes root = pkgdir(PointProcesses) -data = CSV.read(joinpath(root, "docs", "examples", "data", "cats.csv"), DataFrame) +data = CSV.read(joinpath(root, "docs", "examples", "data", "cats.csv"), DataFrame); +nothing #hide ```` This dataset has three cats in it, so let's first separate out each cat, using their weight to infer their identity @@ -46,7 +47,8 @@ cat_weights = [parse(Float64, split(i)[1]) for i in data.Value] clusters = kmeans(reshape(cat_weights, 1, :), 3; maxiter=100, display=:none) data.CatWeight = cat_weights -data.CatID = clusters.assignments +data.CatID = clusters.assignments; +nothing #hide ```` Now that we have set this up we need to get the timestamps in a useable order for plotting. We can use the Dates package for this. @@ -70,7 +72,9 @@ Since this dataset only has resolution up to the nearest minute, we need to chec ````@example Hawkes if any(diff(sort(data.t)) .== 0) - println("Data contains tied event times. Please add small jitter to event times to make them unique.") + println( + "Data contains tied event times. Please add small jitter to event times to make them unique.", + ) else println("Data contains no tied event times. Proceed with fitting. Yay!") end @@ -79,8 +83,23 @@ end Great! We can now move forward. First, let's visualize the data using a simple event plot. ````@example Hawkes -function eventplot(event_times::Vector{Float64}; title="Event Plot", xlabel="Time (minutes)", ylabel="Events") - scatter(event_times, ones(length(event_times)); markershape=:vline, markersize=10, label="", title=title, xlabel=xlabel, ylabel=ylabel, yticks=false) +function eventplot( + event_times::Vector{Float64}; + title="Event Plot", + xlabel="Time (minutes)", + ylabel="Events", +) + scatter( + event_times, + ones(length(event_times)); + markershape=:vline, + markersize=10, + label="", + title=title, + xlabel=xlabel, + ylabel=ylabel, + yticks=false, + ) end cat1_times = data.t[data.CatID .== 1] @@ -100,10 +119,20 @@ in PointProcesses.jl to fit a piecewise constant intensity function to the data. While this will not give us any information about self-exciting behavior, it will help us understand the daily patterns of litter box usage. ````@example Hawkes -tod = minute.(data.TimestampDT) .+ 60 .* hour.(data.TimestampDT) # time-of-day in minutes since midnight +tod_raw = Float64.(minute.(data.TimestampDT) .+ 60 .* hour.(data.TimestampDT)) # time-of-day in minutes since midnight day = Date.(data.TimestampDT) n_days = length(unique(day)) -h_day = History(sort(Float64.(tod)), 0.0, 1440.0) # build a "history" on [0, 1440] minutes +```` + +Collapsing many days into a single [0, 1440) window produces events at the +same minute-of-day across different days. Add a tiny deterministic jitter +(≪ the 15-minute bin width below) so the events are strictly ordered for +`History`. The piecewise-constant fit aggregates into bins, so the jitter is +invisible to the resulting intensity. + +````@example Hawkes +tod = sort(tod_raw .+ (1:length(tod_raw)) .* 1e-6) +h_day = History(tod, 0.0, 1440.0) # build a "history" on [0, 1440] minutes nbins = 96 # 96 bins = 15-minute bins pp_day = fit( InhomogeneousPoissonProcess{PiecewiseConstantIntensity{Float64},Dirac{Nothing}}, @@ -130,11 +159,8 @@ The goal of this analysis is to understand the self-exciting nature of the litte does that increase the likelihood of another cat using it soon after? To do this, we can use the implementation in PointProcesses.jl ````@example Hawkes -full_history = History(data.t, 0.0, maximum(data.t) + 1.0) -hawkes_model = fit( - HawkesProcess, - full_history, -) +full_history = History(sort(data.t), 0.0, maximum(data.t) + 1.0) +hawkes_model = fit(HawkesProcess, full_history) println("Fitted Hawkes Process Parameters:") # hide println("Base intensity (μ): ", hawkes_model.μ) # hide @@ -153,17 +179,21 @@ Finally, we can visualize the fitted intensity function over time. ````@example Hawkes ts = sort(data.t) -λ_hawkes(t::Real) = +function λ_hawkes(t::Real) hawkes_model.μ + sum((hawkes_model.α * exp(-hawkes_model.ω * (t - ti)) for ti in ts if ti < t); init=0.0) +end u = range(0.0, maximum(ts) + 1.0; length=2000) -plot(u, λ_hawkes.(u); - xlabel="Time (minutes)", - ylabel="Fitted Hawkes intensity (events/min)", - title="Fitted Hawkes Process Intensity Function", - legend=false) +plot( + u, + λ_hawkes.(u); + xlabel="Time (minutes)", + ylabel="Fitted Hawkes intensity (events/min)", + title="Fitted Hawkes Process Intensity Function", + legend=false, +) ```` From the plot, we can observe how the intensity function varies over time, capturing the self-exciting nature of the litter box entries. @@ -176,11 +206,15 @@ test = MonteCarloTest(KSDistance{Exponential}, hawkes_model, full_history; n_sim p = pvalue(test) # hide println("Monte Carlo Test p-value for Hawkes Process fit: ", p) # hide -println("Assuming a significance level of 0.05, we " * (p < 0.05 ? "reject" : "fail to reject") * "",) # hide +println( + "Assuming a significance level of 0.05, we " * + (p < 0.05 ? "reject" : "fail to reject") * + "", +) # hide println("the null hypothesis that the Hawkes process is a good fit to the data.") # hide ```` -From this analysis, it seems that we can can say that litter-box usage is indeed a self-exciting process, as the fitted Hawkes process provides a good fit to the data. +From this analysis, it seems that we can say that litter-box usage is indeed a self-exciting process, as the fitted Hawkes process provides a good fit to the data. It's worth considering that we have ignored the identities of the cats in this analysis. A more thorough analysis could involve fitting a multivariate Hawkes process, where each cat is represented as a separate dimension. This would allow us to capture the interactions between the cats more accurately. This will be the subject of a future tutorial. diff --git a/docs/src/examples/Inhomogeneous.md b/docs/src/examples/Inhomogeneous.md index c0b8a08..901d21b 100644 --- a/docs/src/examples/Inhomogeneous.md +++ b/docs/src/examples/Inhomogeneous.md @@ -266,6 +266,29 @@ scatter!( ) ```` +## Goodness-of-Fit via Time Rescaling +By the time-rescaling theorem, if a fitted intensity ``\hat{λ}(t)`` is correct, then the +events transformed by the compensator ``Λ(t) = ∫_{t_{\min}}^{t} \hat{λ}(s) ds`` form a +unit-rate Poisson process. We can use `MonteCarloTest` with `KSDistance{Exponential}` to +check whether the transformed inter-event times look Exp(1): + +````@example Inhomogeneous +pp_poly_full = InhomogeneousPoissonProcess(pp_poly) +pp_gauss_full = InhomogeneousPoissonProcess(pp_gauss) + +mc_piecewise = MonteCarloTest(KSDistance{Exponential}, pp_piecewise, h; n_sims=500, rng=rng) +mc_poly = MonteCarloTest(KSDistance{Exponential}, pp_poly_full, h; n_sims=500, rng=rng) +mc_gauss = MonteCarloTest(KSDistance{Exponential}, pp_gauss_full, h; n_sims=500, rng=rng) + +println("\nGoodness-of-fit p-values (Monte Carlo KS test):") # hide +println(" Piecewise Constant: ", round(pvalue(mc_piecewise); digits=3)) # hide +println(" Polynomial: ", round(pvalue(mc_poly); digits=3)) # hide +println(" Gaussian (Custom): ", round(pvalue(mc_gauss); digits=3)) # hide +```` + +A small p-value provides evidence against the null hypothesis that the model captures the +observed firing pattern, while a large p-value indicates the model is consistent with the data. + ## Model Comparison Let's compare all models by computing their negative log-likelihoods: diff --git a/src/history.jl b/src/history.jl index 3502d96..af73102 100644 --- a/src/history.jl +++ b/src/history.jl @@ -11,6 +11,23 @@ Linear event histories with temporal locations of type `T` and marks of type `M` - `marks::Vector{M}`: associated vector of event marks - `dims::Vector{D}`: associated vector of event dimensions - `N::Int`: number of dimensions + +# Construction + +The constructor validates its inputs and throws on any violation rather than +silently coercing them. With `check_args=true` (the default), it requires: + +- `tmin < tmax` +- `length(times) == length(marks) == length(dims)` +- `issorted(times)` +- every event time lies in the half-open interval `[tmin, tmax)` +- no two events share a time *within the same dimension* (equal times across + different dimensions are allowed, representing simultaneous events on + different components of a multivariate process) +- if `dims` is provided, every value lies in `1:N` + +Pass `check_args=false` to skip validation when the caller can guarantee the +invariants hold (e.g. for performance in inner simulation loops). """ struct History{T<:Real,M,D} times::Vector{T} @@ -46,17 +63,39 @@ struct History{T<:Real,M,D} ) end if !isempty(times) - perm = sortperm(times) - times .= times[perm] - marks .= marks[perm] - dims .= dims[perm] + if !issorted(times) + throw( + DomainError( + times, "Event times must be sorted in non-decreasing order." + ), + ) + end if times[1] < tmin || times[end] >= tmax - @warn "Events outside of provided interval were discarded." - il = searchsortedfirst(times, tmin) - ir = searchsortedfirst(times, tmax) - 1 - times = times[il:ir] - marks = marks[il:ir] - dims = dims[il:ir] + throw( + DomainError( + (times[1], times[end], tmin, tmax), + "All event times must lie in the half-open interval [tmin, tmax).", + ), + ) + end + # Repeated event times are allowed only across different + # dimensions (a simultaneous event in two components of a + # multivariate process). Within a single dimension, two events + # at the same time are pathological for a simple point process. + for i in 2:length(times) + times[i] == times[i - 1] || continue + j = i - 1 + while j >= 1 && times[j] == times[i] + if dims[j] == dims[i] + throw( + DomainError( + (times[i], dims[i]), + "Repeated event time within a single dimension.", + ), + ) + end + j -= 1 + end end end if N == 1 @@ -314,6 +353,12 @@ duration(h::History) = max_time(h) - min_time(h) push!(h, t, m) Add event `(t, m)` inside the interval `[h.tmin, h.tmax)` at the end of history `h`. + +With `check_args=true` (the default), the event must satisfy +`h.tmin <= t < h.tmax`, occur at or after the last existing event time, and +(if `h` is multivariate) lie in dimension `d ∈ 1:h.N`. Violations throw an +`AssertionError`. Pass `check_args=false` to skip these checks in trusted +inner loops. """ function Base.push!(h::History, t::Real, m=nothing, d=nothing; check_args=true) if check_args @@ -331,6 +376,13 @@ end append!(h, ts, ms) Append events `(ts, ms)` inside the interval `[h.tmin, h.tmax)` at the end of history `h`. + +With `check_args=true` (the default), `ts` must be sorted, lie in +`[h.tmin, h.tmax)`, start at or after the last existing event in `h`, and +have matching lengths with `ms` and `ds`. If `h` is multivariate, every +dimension in `ds` must be in `1:h.N`. Violations throw an `AssertionError`. +The caller's `ts`/`ms`/`ds` vectors are *not* mutated. Pass `check_args=false` +to skip these checks in trusted inner loops. """ function Base.append!( h::History, @@ -343,13 +395,10 @@ function Base.append!( return nothing end if check_args - perm = sortperm(ts) - ts .= ts[perm] - ms .= ms[perm] - ds .= ds[perm] + @assert length(ts) == length(ms) == length(ds) + @assert issorted(ts) @assert h.tmin <= ts[1] && ts[end] < h.tmax @assert isempty(h) || (h.times[end] <= ts[1]) - @assert length(ts) == length(ms) == length(ds) @assert (eltype(ds) == Nothing && h.N == 1) || all(1 .<= ds .<= h.N) end append!(h.times, ts) diff --git a/src/univariate/poisson/inhomogeneous/inhomogeneous_poisson_process.jl b/src/univariate/poisson/inhomogeneous/inhomogeneous_poisson_process.jl index bf87dab..254bfa3 100644 --- a/src/univariate/poisson/inhomogeneous/inhomogeneous_poisson_process.jl +++ b/src/univariate/poisson/inhomogeneous/inhomogeneous_poisson_process.jl @@ -122,18 +122,31 @@ The transformed event times form a unit-rate (homogeneous) Poisson process on function time_change(h::History, pp::InhomogeneousPoissonProcess) f = pp.intensity_function config = pp.integration_config - new_tmax = integrated_intensity(f, h.tmin, h.tmax, config) - T = typeof(new_tmax) - new_times = T[integrated_intensity(f, h.tmin, t, config) for t in h.times] - # `new_tmax` and each `new_times[i]` come from independent quadrature calls, - # so solver / floating-point error can both swap adjacent transformed - # events and let one of them slightly exceed `new_tmax`. Widen `new_tmax` - # to the observed maximum (with an `eps` cushion) so no event is dropped - # at the boundary; small order drift is then handled by the History - # constructor's automatic `sortperm` pass under `check_args=true`. - if !isempty(new_times) - new_tmax = max(new_tmax, maximum(new_times) * (one(T) + eps(T))) + edges = vcat(h.tmin, h.times, h.tmax) + deltas = [ + integrated_intensity(f, edges[i], edges[i + 1], config) for + i in 1:(length(edges) - 1) + ] + T = eltype(deltas) + i_neg = findfirst(<(zero(T)), deltas) + if i_neg !== nothing + throw( + DomainError( + deltas[i_neg], + "time_change: integrated intensity over " * + "[$(edges[i_neg]), $(edges[i_neg + 1])] is negative. " * + "This usually reflects floating-point jitter from adaptive " * + "quadrature on an interval where the true integral is near zero, " * + "or an `intensity_function` that returns negative values. " * + "Tightening the tolerance in `integration_config` may help if " * + "the true integral is non-negligible; otherwise verify that " * + "`intensity_function` is non-negative on the support of `h`.", + ), + ) end + cs = cumsum(deltas) + new_times = cs[1:(end - 1)] + new_tmax = cs[end] return History(; times=new_times, tmin=zero(T), tmax=new_tmax, marks=h.marks) end diff --git a/test/history.jl b/test/history.jl index d1264b7..189d20a 100644 --- a/test/history.jl +++ b/test/history.jl @@ -37,7 +37,7 @@ @test duration(h_exp) == exp(2.5) - exp(0.0) - append!(h2, [2.45, 2.4], ["g", "f"]) + append!(h2, [2.4, 2.45], ["f", "g"]) @test h2.times == [2.3, 2.4, 2.45] @test h2.marks == ["e", "f", "g"] @@ -45,9 +45,11 @@ @test isa(History(rand(3), 0, BigFloat(1)), History{BigFloat,Nothing}) @test_throws DomainError History(rand(3), 1, 0) @test_throws DimensionMismatch History(rand(3), 0, 1, ["a", "b"]) - @test_logs (:warn, "Events outside of provided interval were discarded.") History( - [0.1, 1.1], 0, 1 - ) + # Strict validation: events outside [tmin, tmax) must throw, not silently + # discard. Same for unsorted times and same-time-same-dim repeats. + @test_throws DomainError History([0.1, 1.1], 0, 1) + @test_throws DomainError History([0.2, 0.1, 0.3], 0.0, 1.0) + @test_throws DomainError History([0.1, 0.1], 0.0, 1.0) end @testset "Multivariate History" begin @@ -88,8 +90,11 @@ end # Test append! with dimensions @test_throws AssertionError append!(h_multi, [0.8, 0.9], ["f", "g"], [2, 1]) append!(h_multi, Float64[]) - append!(h_multi, [0.95, 0.9], ["f", "g"], [2, 1]) + append!(h_multi, [0.9, 0.95], ["g", "f"], [1, 2]) @test nb_events(h_multi) == 7 @test event_times(h_multi, 1) == [0.1, 0.5, 0.85, 0.9] @test event_marks(h_multi, 1) == ["a", "b", "e", "g"] + # append! now validates strictly: unsorted input must throw instead of + # being silently re-sorted (and silently mutating the caller's vectors). + @test_throws AssertionError append!(h_multi, [0.99, 0.97], ["x", "y"], [1, 2]) end diff --git a/test/inhomogeneous_poisson_process.jl b/test/inhomogeneous_poisson_process.jl index efc3d12..d5bb13e 100644 --- a/test/inhomogeneous_poisson_process.jl +++ b/test/inhomogeneous_poisson_process.jl @@ -77,6 +77,233 @@ rng = Random.seed!(12345) end end +@testset "Time change - non-trivial integrals" begin + # These tests exercise time_change against closed-form compensators for + # intensities where adaptive quadrature has to do real work (sinusoidal, + # exponential, mixed-frequency, multi-scale). Each test checks the + # transformed times against analytic Λ(t) values, not just structural + # invariants. + + @testset "Sinusoidal — Λ has closed form" begin + # λ(t) = a + b*sin(ω*t + φ) + # Λ(t) - Λ(0) = a*t - (b/ω)*(cos(ω*t + φ) - cos(φ)) + a, b, ω, φ = 5.0, 2.0, 2π, π / 6 + intensity = SinusoidalIntensity(a, b, ω, φ) + pp = InhomogeneousPoissonProcess(intensity) + + h = History([0.13, 0.47, 0.92, 1.5, 2.1, 2.8, 3.55], 0.0, 4.0) + Λ(t) = a * t - (b / ω) * (cos(ω * t + φ) - cos(φ)) + + h_transf = time_change(h, pp) + @test h_transf.tmin ≈ 0.0 + @test h_transf.tmax ≈ Λ(4.0) rtol = 1e-6 + @test h_transf.times ≈ [Λ(t) for t in h.times] rtol = 1e-6 + @test issorted(h_transf.times) + end + + @testset "Sinusoidal — large amplitude near zero crossings" begin + # b is close to a, so λ dips near zero between event times. Quadrature + # has to be accurate even where the integrand is small. + a, b, ω, φ = 3.0, 2.95, 2π, 0.0 + intensity = SinusoidalIntensity(a, b, ω, φ) + pp = InhomogeneousPoissonProcess(intensity) + + # event times bracketing zero-crossings of λ + h = History([0.2, 0.6, 0.8, 1.2, 1.7], 0.0, 2.0) + Λ(t) = a * t - (b / ω) * (cos(ω * t + φ) - cos(φ)) + + h_transf = time_change(h, pp) + @test h_transf.times ≈ [Λ(t) for t in h.times] rtol = 1e-5 + @test h_transf.tmax ≈ Λ(2.0) rtol = 1e-5 + @test issorted(h_transf.times) + end + + @testset "Exponential — increasing intensity" begin + # λ(t) = a*exp(b*t), Λ(t) - Λ(0) = (a/b)*(exp(b*t) - 1) + a, b = 2.0, 0.4 + intensity = ExponentialIntensity(a, b) + pp = InhomogeneousPoissonProcess(intensity) + + h = History([0.5, 1.2, 2.0, 2.9, 3.8], 0.0, 5.0) + Λ(t) = (a / b) * (exp(b * t) - 1) + + h_transf = time_change(h, pp) + @test h_transf.tmin ≈ 0.0 + @test h_transf.tmax ≈ Λ(5.0) rtol = 1e-6 + @test h_transf.times ≈ [Λ(t) for t in h.times] rtol = 1e-6 + @test issorted(h_transf.times) + end + + @testset "Exponential — decreasing intensity (tail is small)" begin + # λ(t) decays — late deltas can be very small without being zero. + a, b = 5.0, -0.3 + intensity = ExponentialIntensity(a, b) + pp = InhomogeneousPoissonProcess(intensity) + + h = History([0.1, 0.4, 1.0, 2.5, 5.0, 8.0], 0.0, 10.0) + Λ(t) = (a / b) * (exp(b * t) - 1) + + h_transf = time_change(h, pp) + @test h_transf.tmax ≈ Λ(10.0) rtol = 1e-6 + @test h_transf.times ≈ [Λ(t) for t in h.times] rtol = 1e-6 + # decay means inter-event deltas in transformed coordinates shrink + diffs = diff(h_transf.times) + @test all(diffs .> 0) + end + + @testset "Quadratic polynomial — non-zero tmin" begin + # λ(t) = 1 + 0.5*t + 0.1*t² + # Λ(t) - Λ(tmin) = (t - tmin) + 0.25*(t² - tmin²) + (0.1/3)*(t³ - tmin³) + intensity = PolynomialIntensity([1.0, 0.5, 0.1]) + pp = InhomogeneousPoissonProcess(intensity) + + tmin = 1.0 + h = History([1.5, 2.0, 3.5, 4.7, 5.9], tmin, 6.0) + Λ(t) = (t - tmin) + 0.25 * (t^2 - tmin^2) + (0.1 / 3) * (t^3 - tmin^3) + + h_transf = time_change(h, pp) + @test h_transf.tmin ≈ 0.0 + @test h_transf.tmax ≈ Λ(6.0) rtol = 1e-6 + @test h_transf.times ≈ [Λ(t) for t in h.times] rtol = 1e-6 + end + + @testset "Custom mixed-frequency intensity" begin + # λ(t) = 2 + cos(t)^2 + 0.1*t + # cos(t)^2 = (1 + cos(2t))/2 + # Λ(t) - Λ(0) = 2t + t/2 + sin(2t)/4 + 0.05*t² + intensity = t -> 2.0 + cos(t)^2 + 0.1 * t + pp = InhomogeneousPoissonProcess(intensity) + + h = History([0.3, 1.1, 2.4, 4.2, 6.0, 7.5], 0.0, 8.0) + Λ(t) = 2 * t + t / 2 + sin(2 * t) / 4 + 0.05 * t^2 + + h_transf = time_change(h, pp) + @test h_transf.tmax ≈ Λ(8.0) rtol = 1e-6 + @test h_transf.times ≈ [Λ(t) for t in h.times] rtol = 1e-6 + end + + @testset "Multi-scale intensity (slow drift + sharp oscillation)" begin + # λ(t) = 1 + 0.05*t + 0.5*sin(20*t)^2 + # = 1 + 0.05*t + 0.25 - 0.25*cos(40*t) + # Λ(t) = 1.25*t + 0.025*t² - sin(40*t)/160 + intensity = t -> 1.0 + 0.05 * t + 0.5 * sin(20 * t)^2 + pp = InhomogeneousPoissonProcess(intensity) + + h = History(collect(0.1:0.3:4.9), 0.0, 5.0) + Λ(t) = 1.25 * t + 0.025 * t^2 - sin(40 * t) / 160 + + h_transf = time_change(h, pp) + @test h_transf.tmax ≈ Λ(5.0) rtol = 1e-4 + @test h_transf.times ≈ [Λ(t) for t in h.times] rtol = 1e-4 + @test issorted(h_transf.times) + end + + @testset "PiecewiseConstantIntensity — exact compensator" begin + breakpoints = [0.0, 1.0, 2.5, 4.0, 6.0] + rates = [1.5, 0.8, 3.2, 2.1] + intensity = PiecewiseConstantIntensity(breakpoints, rates) + pp = InhomogeneousPoissonProcess(intensity) + + function Λ(t) + acc = 0.0 + for i in 1:(length(breakpoints) - 1) + lo, hi = breakpoints[i], breakpoints[i + 1] + if t <= lo + return acc + end + acc += rates[i] * (min(t, hi) - lo) + end + return acc + end + + h = History([0.5, 1.5, 2.7, 3.0, 4.8, 5.5], 0.0, 6.0) + h_transf = time_change(h, pp) + + @test h_transf.tmax ≈ Λ(6.0) rtol = 1e-6 + @test h_transf.times ≈ [Λ(t) for t in h.times] rtol = 1e-6 + @test issorted(h_transf.times) + end + + @testset "Tightly clustered events" begin + # Events very close together stress the per-interval quadrature: each + # delta is tiny but must remain strictly positive. + intensity = PolynomialIntensity([2.0, 0.3]) + pp = InhomogeneousPoissonProcess(intensity) + + base = 1.0 + h = History([base, base + 1e-4, base + 2e-4, base + 3e-4], 0.0, 2.0) + Λ(t) = 2.0 * t + 0.15 * t^2 + + h_transf = time_change(h, pp) + @test h_transf.times ≈ [Λ(t) for t in h.times] rtol = 1e-6 + @test all(diff(h_transf.times) .> 0) + end + + @testset "Single event" begin + intensity = ExponentialIntensity(1.5, 0.2) + pp = InhomogeneousPoissonProcess(intensity) + + h = History([1.7], 0.0, 3.0) + Λ(t) = (1.5 / 0.2) * (exp(0.2 * t) - 1) + + h_transf = time_change(h, pp) + @test length(h_transf.times) == 1 + @test h_transf.times[1] ≈ Λ(1.7) rtol = 1e-6 + @test h_transf.tmax ≈ Λ(3.0) rtol = 1e-6 + end + + @testset "Long history with many events preserves monotonicity" begin + intensity = SinusoidalIntensity(8.0, 3.0, π, 0.0) + pp = InhomogeneousPoissonProcess(intensity) + + rng_local = Random.seed!(2718) + h = simulate(rng_local, pp, 0.0, 100.0) + @test nb_events(h) > 50 + + h_transf = time_change(h, pp) + @test h_transf.tmin ≈ 0.0 + @test issorted(h_transf.times) + @test all(diff(h_transf.times) .> 0) + @test h_transf.times[end] < h_transf.tmax + end + + @testset "Simulate-then-time-change yields unit-rate-like spacing" begin + # Under a correctly specified inhomogeneous PP, time-rescaled + # inter-event times should be approximately Exp(1), so their mean + # should be ≈ 1. + intensity = PolynomialIntensity([1.0, 0.4]) + pp = InhomogeneousPoissonProcess(intensity) + + rng_local = Random.seed!(31415) + h = simulate(rng_local, pp, 0.0, 200.0) + h_transf = time_change(h, pp) + + gaps = diff(vcat(h_transf.tmin, h_transf.times, h_transf.tmax)) + @test isapprox(mean(gaps), 1.0; atol=0.15) + end + + @testset "Negative integrated intensity throws DomainError" begin + # If the user supplies an intensity that returns negative values, + # time_change should surface a DomainError rather than silently + # producing non-monotone output. + bad_intensity = t -> -1.0 + 0.0 * t + pp_bad = InhomogeneousPoissonProcess(bad_intensity) + + h = History([0.5, 1.5, 2.5], 0.0, 3.0) + @test_throws DomainError time_change(h, pp_bad) + end + + @testset "Marks preserved across time change" begin + intensity = PolynomialIntensity([1.0, 0.5]) + pp = InhomogeneousPoissonProcess(intensity, Normal()) + + h = History([0.5, 1.5, 2.5, 3.5], 0.0, 4.0, [0.1, -0.3, 0.7, 1.2]) + h_transf = time_change(h, pp) + @test h_transf.marks == h.marks + @test length(h_transf.marks) == length(h_transf.times) + end +end + @testset "PolynomialIntensity" begin # Linear intensity: λ(t) = 1 + 0.5*t intensity_linear = PolynomialIntensity([1.0, 0.5])