diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index 1a2ec6a6a691..bacd176ea776 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -202,6 +202,8 @@ New Features
* GITHUB#15468: Add support for `@SuppressAssertingFormats` annotation for fine-grained control over `AssertingCodec` formats (Prudhvi Godithi)
+ * GITHUB#15518: Add support for post-collection faceting to the new faceting API in the sandbox module. (Egor Potemkin)
+
Improvements
---------------------
* GITHUB#15148: Add support uint8 distance and allow 8 bit scalar quantization (Trevor McCulloch)
diff --git a/lucene/sandbox/src/java/org/apache/lucene/sandbox/facet/utils/PostCollectionFaceting.java b/lucene/sandbox/src/java/org/apache/lucene/sandbox/facet/utils/PostCollectionFaceting.java
new file mode 100644
index 000000000000..e11e267148c8
--- /dev/null
+++ b/lucene/sandbox/src/java/org/apache/lucene/sandbox/facet/utils/PostCollectionFaceting.java
@@ -0,0 +1,454 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.sandbox.facet.utils;
+
+import static org.apache.lucene.search.DocIdSetIterator.NO_MORE_DOCS;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.concurrent.Callable;
+import java.util.concurrent.Executor;
+import org.apache.lucene.facet.FacetsCollector;
+import org.apache.lucene.search.Collector;
+import org.apache.lucene.search.CollectorManager;
+import org.apache.lucene.search.DocIdSetIterator;
+import org.apache.lucene.search.LeafCollector;
+import org.apache.lucene.search.Scorable;
+import org.apache.lucene.search.TaskExecutor;
+import org.apache.lucene.util.ArrayUtil;
+
+/**
+ * Performs post-collection faceting by replaying collected documents through drill-down and
+ * drill-sideways collectors. This enables parallel facet computation after initial document
+ * collection.
+ *
+ *
Normally, users can collect facets directly during search without needing FacetsCollector to
+ * store doc IDs. However, this class implements the second step of two-step collection: iterating
+ * over doc IDs already collected in FacetsCollector to compute facet results. This approach is
+ * useful when all matches must be known before computing facets, or when reusing the same matching
+ * documents to run faceting multiple times.
+ *
+ * @param drill-down collector type
+ * @param drill-down result type
+ * @param drill-sideways collector type
+ * @param drill-sideways result type
+ */
+public final class PostCollectionFaceting {
+ // TODO: is there more optimal slicing method or docs per slice limit?
+ static final int MIN_DOCS_PER_SLICE = 100;
+
+ private final CollectorManager drillDownCollectorManager;
+ private final List> drillSidewaysCollectorManagers;
+ private final FacetsCollector drillDownFacetsCollector;
+ private final Map drillSidewaysFacetsCollectors;
+ private final TaskExecutor taskExecutor;
+ private final Map dimToIndexMap;
+ private final int numOfIndexLeafs;
+
+ /**
+ * Creates a new PostCollectionFaceting instance.
+ *
+ * @param drillDownCollectorManager collector manager for drill-down results
+ * @param drillSidewaysCollectorManagers map of dimension names to collector managers for
+ * drill-sideways results
+ * @param drillDownFacetsCollector facets collector containing drill-down matching documents
+ * @param drillSidewaysFacetsCollectors map of dimension names to facets collectors for
+ * drill-sideways
+ * @param executor executor for parallel processing, or null for sequential execution
+ */
+ public PostCollectionFaceting(
+ CollectorManager drillDownCollectorManager,
+ Map> drillSidewaysCollectorManagers,
+ FacetsCollector drillDownFacetsCollector,
+ Map drillSidewaysFacetsCollectors,
+ Executor executor) {
+ this.drillDownCollectorManager = drillDownCollectorManager;
+ this.drillDownFacetsCollector = drillDownFacetsCollector;
+ if (drillSidewaysFacetsCollectors == null) {
+ this.drillSidewaysFacetsCollectors = Map.of();
+ } else {
+ this.drillSidewaysFacetsCollectors = drillSidewaysFacetsCollectors;
+ }
+ if (executor == null) {
+ this.taskExecutor = new TaskExecutor(Runnable::run);
+ } else {
+ this.taskExecutor = new TaskExecutor(executor);
+ }
+ this.numOfIndexLeafs = calculateNumOfIndexLeafs();
+ this.dimToIndexMap = new HashMap<>();
+ int ind = 0;
+ if (drillSidewaysCollectorManagers == null) {
+ // assert this.drillSidewaysFacetsCollectors.isEmpty();
+ this.drillSidewaysCollectorManagers = List.of();
+ } else {
+ // Ignore dimensions that don't exist in either one of the maps.
+ this.drillSidewaysCollectorManagers = new ArrayList<>(drillSidewaysCollectorManagers.size());
+ for (Map.Entry> entry :
+ drillSidewaysCollectorManagers.entrySet()) {
+ if (this.drillSidewaysFacetsCollectors.containsKey(entry.getKey())) {
+ dimToIndexMap.put(entry.getKey(), ind++);
+ this.drillSidewaysCollectorManagers.add(entry.getValue());
+ }
+ }
+ }
+ }
+
+ /**
+ * Creates a new PostCollectionFaceting instance without drill-sideways collectors.
+ *
+ * @param drillDownCollectorManager collector manager for drill-down results
+ * @param drillDownFacetsCollector facets collector containing drill-down matching documents
+ * @param executor executor for parallel processing, or null for sequential execution
+ */
+ public PostCollectionFaceting(
+ CollectorManager drillDownCollectorManager,
+ FacetsCollector drillDownFacetsCollector,
+ Executor executor) {
+ this(drillDownCollectorManager, null, drillDownFacetsCollector, null, executor);
+ }
+
+ private int calculateNumOfIndexLeafs() {
+ int maxOrd = -1;
+ for (FacetsCollector.MatchingDocs matchingDocs : drillDownFacetsCollector.getMatchingDocs()) {
+ maxOrd = Math.max(maxOrd, matchingDocs.context().ord);
+ }
+ for (FacetsCollector facetsCollector : drillSidewaysFacetsCollectors.values()) {
+ for (FacetsCollector.MatchingDocs matchingDocs : facetsCollector.getMatchingDocs()) {
+ maxOrd = Math.max(maxOrd, matchingDocs.context().ord);
+ }
+ }
+ return maxOrd + 1;
+ }
+
+ /**
+ * Organizes matching documents into a 2D array indexed by leaf ordinal and dimension ordinal.
+ *
+ * @return leaf ord -> dim ord -> matching docs, where dim ordinal is from dimToIndexMap
+ * Example: For an index with 3 segments and 2 drill-sideways dimensions ("brand",
+ * "color"):
+ *
+ * // dimToIndexMap: {"brand" -> 0, "color" -> 1}
+ * // Result array structure:
+ * result[0][0] = drill-down docs for segment 0
+ * result[0][1] = "brand" drill-sideways docs for segment 0
+ * result[0][2] = "color" drill-sideways docs for segment 0
+ * result[1][0] = drill-down docs for segment 1
+ * result[1][1] = "brand" drill-sideways docs for segment 1
+ * ...
+ *
+ * Note: {@link FacetsCollector#getMatchingDocs()} returns one MatchingDocs per visited
+ * segment, so the number of MatchingDocs is never greater than the number of index segments,
+ * even if intra-segment concurrency was used to collect data.
+ */
+ private FacetsCollector.MatchingDocs[][] getPerLeafMatchingDocs() {
+ // Max dim
+ FacetsCollector.MatchingDocs[][] perLeafMatchingDocs =
+ new FacetsCollector.MatchingDocs[numOfIndexLeafs]
+ [drillSidewaysCollectorManagers.size() + 1];
+ for (FacetsCollector.MatchingDocs drillDownMatchingDocs :
+ drillDownFacetsCollector.getMatchingDocs()) {
+ perLeafMatchingDocs[drillDownMatchingDocs.context().ord][0] = drillDownMatchingDocs;
+ }
+ for (Map.Entry entry : dimToIndexMap.entrySet()) {
+ for (FacetsCollector.MatchingDocs matchingDocs :
+ drillSidewaysFacetsCollectors.get(entry.getKey()).getMatchingDocs()) {
+ perLeafMatchingDocs[matchingDocs.context().ord][entry.getValue() + 1] = matchingDocs;
+ }
+ }
+ return perLeafMatchingDocs;
+ }
+
+ private record Slice(FacetsCollector.MatchingDocs[][] leafMatchingDocs) {}
+
+ /**
+ * Partitions matching documents into slices for parallel processing.
+ *
+ * Slicing enables parallel facet computation by distributing work across multiple threads.
+ * Each slice contains a subset of index segments with enough documents to justify the overhead of
+ * parallel execution, improving throughput for large result sets.
+ *
+ * @param minDocsPerSlice minimum number of documents per slice to balance parallelization
+ * overhead
+ * @param perLeafMatchingDocs matching documents organized by leaf ordinal and dimension ordinal
+ * @return list of slices, each containing a subset of segments for independent processing
+ */
+ private List getSlices(
+ int minDocsPerSlice, FacetsCollector.MatchingDocs[][] perLeafMatchingDocs) {
+ List slices = new ArrayList<>();
+
+ int currentSliceSize = 0;
+ int lastSliceEnd = -1;
+ for (int leafOrd = 0; leafOrd < perLeafMatchingDocs.length; leafOrd++) {
+ for (int dimOrd = 0; dimOrd < perLeafMatchingDocs[leafOrd].length; dimOrd++) {
+ if (perLeafMatchingDocs[leafOrd][dimOrd] != null) {
+ currentSliceSize += perLeafMatchingDocs[leafOrd][dimOrd].totalHits();
+ }
+ }
+ if (currentSliceSize >= minDocsPerSlice) {
+ slices.add(
+ new Slice(
+ ArrayUtil.copyOfSubArray(perLeafMatchingDocs, lastSliceEnd + 1, leafOrd + 1)));
+ currentSliceSize = 0;
+ lastSliceEnd = leafOrd;
+ }
+ }
+ // add final slice
+ if (currentSliceSize > 0) {
+ slices.add(
+ new Slice(
+ ArrayUtil.copyOfSubArray(
+ perLeafMatchingDocs, lastSliceEnd + 1, perLeafMatchingDocs.length)));
+ }
+ return slices;
+ }
+
+ /**
+ * Collects facet results by replaying documents through collectors in parallel slices.
+ *
+ * @return result containing drill-down and drill-sideways facet results
+ * @throws IOException if an I/O error occurs during collection
+ */
+ public Result collect() throws IOException {
+ FacetsCollector.MatchingDocs[][] perLeafMatchingDocs = getPerLeafMatchingDocs();
+ final List leafSlices = getSlices(MIN_DOCS_PER_SLICE, perLeafMatchingDocs);
+
+ if (leafSlices.size() == 0) {
+ return getEmptyResult();
+ } else {
+ final List drillDownCollectors;
+ if (drillDownCollectorManager != null) {
+ drillDownCollectors = new ArrayList<>(leafSlices.size());
+ } else {
+ drillDownCollectors = null;
+ }
+ final List> drillSidewaysCollectors = new ArrayList<>(leafSlices.size());
+ final List> listTasks = new ArrayList<>(leafSlices.size());
+ for (int i = 0; i < leafSlices.size(); ++i) {
+ final Slice slice = leafSlices.get(i);
+ // drill down collector
+ final C drillDownCollector;
+ if (drillDownCollectorManager != null) {
+ drillDownCollector = drillDownCollectorManager.newCollector();
+ drillDownCollectors.add(drillDownCollector);
+ } else {
+ drillDownCollector = null;
+ }
+
+ // drill sideways collectors
+ List drillSidewaysSliceCollectors =
+ new ArrayList<>(drillSidewaysCollectorManagers.size());
+ for (CollectorManager manager : drillSidewaysCollectorManagers) {
+ drillSidewaysSliceCollectors.add(manager.newCollector());
+ }
+ drillSidewaysCollectors.add(drillSidewaysSliceCollectors);
+ listTasks.add(() -> collectSlice(slice, drillDownCollector, drillSidewaysSliceCollectors));
+ }
+ taskExecutor.invokeAll(listTasks);
+ Map drillSidewaysResults = new HashMap<>(drillSidewaysCollectorManagers.size());
+ for (Map.Entry entry : dimToIndexMap.entrySet()) {
+ List collectors =
+ drillSidewaysCollectors.stream()
+ .map(list -> list.get(dimToIndexMap.get(entry.getKey())))
+ .toList();
+ CollectorManager collectorManager =
+ drillSidewaysCollectorManagers.get(entry.getValue());
+ drillSidewaysResults.put(entry.getKey(), collectorManager.reduce(collectors));
+ }
+ T drillDownResult;
+ if (drillDownCollectorManager != null) {
+ drillDownResult = drillDownCollectorManager.reduce(drillDownCollectors);
+ } else {
+ drillDownResult = null;
+ }
+ return new Result<>(drillDownResult, drillSidewaysResults);
+ }
+ }
+
+ private Result getEmptyResult() throws IOException {
+ // there are no segments, nothing to offload to the executor, but we do need to call reduce to
+ // create some kind of empty result
+ Map emptyResults = new HashMap<>();
+ for (Map.Entry entry : dimToIndexMap.entrySet()) {
+ emptyResults.put(
+ entry.getKey(), drillSidewaysCollectorManagers.get(entry.getValue()).reduce(List.of()));
+ }
+ T drillDownResult = null;
+ if (drillDownCollectorManager != null) {
+ drillDownResult = drillDownCollectorManager.reduce(List.of());
+ }
+ return new Result<>(drillDownResult, emptyResults);
+ }
+
+ /**
+ * The result. It is very similar to DrillSideways.Result, but it uses Map instead of List for
+ * drill sideways results. See also to-do comment in DrillSidewaysFacetOrchestrator.
+ *
+ * @param drill down result
+ * @param drill sideways result
+ * @param drillDownResult the drill down result
+ * @param drillSidewaysResults the drill sideways results
+ */
+ public record Result(T drillDownResult, Map drillSidewaysResults) {}
+
+ private static class MatchingDocsScorable extends Scorable {
+ private final FacetsCollector.MatchingDocs matchingDocs;
+ private int currentDocId = -1;
+
+ MatchingDocsScorable(FacetsCollector.MatchingDocs matchingDocs) {
+ this.matchingDocs = matchingDocs;
+ }
+
+ void setCurrentDocId(int docId) {
+ this.currentDocId = docId;
+ }
+
+ @Override
+ public float score() throws IOException {
+ assert currentDocId >= 0 : "setCurrentDocId() must be called before score()";
+ assert matchingDocs.scores().length > currentDocId
+ : "scores array is indexed by doc ID (see FacetsCollector.MatchingDocs), so length must be greater"
+ + " than currentDocId";
+ return matchingDocs.scores()[currentDocId];
+ }
+ }
+
+ private static LeafCollector getLeafCollector(
+ FacetsCollector.MatchingDocs matchingDocs, Collector collector) throws IOException {
+ if (matchingDocs == null || collector == null) {
+ return null;
+ }
+ return collector.getLeafCollector(matchingDocs.context());
+ }
+
+ private static MatchingDocsScorable createScorer(
+ FacetsCollector.MatchingDocs matchingDocs, Collector collector) {
+ if (matchingDocs == null || collector == null) {
+ return null;
+ }
+ if (collector.scoreMode().needsScores()) {
+ if (matchingDocs.scores() == null) {
+ throw new IllegalStateException(
+ "Collector requires scores, but FacetCollector doesn't have them.");
+ } else {
+ return new MatchingDocsScorable(matchingDocs);
+ }
+ }
+ return null;
+ }
+
+ private Void collectSlice(Slice slice, C drillDownCollector, List drillSidewaysCollectors)
+ throws IOException {
+ LeafCollector[] leafCollectors = new LeafCollector[drillSidewaysCollectors.size() + 1];
+ // Init lazily as it is not often needed.
+ MatchingDocsScorable[] scorables = null;
+ for (FacetsCollector.MatchingDocs[] leafMatchingDocs : slice.leafMatchingDocs()) {
+ leafCollectors[0] = getLeafCollector(leafMatchingDocs[0], drillDownCollector);
+ // TODO dedup scorer code for drill down and sideways
+ MatchingDocsScorable scorer = createScorer(leafMatchingDocs[0], drillDownCollector);
+ if (scorer != null) {
+ if (scorables == null) {
+ scorables = new MatchingDocsScorable[leafCollectors.length];
+ }
+ scorables[0] = scorer;
+ leafCollectors[0].setScorer(scorer);
+ }
+
+ for (int i = 0; i < drillSidewaysCollectors.size(); i++) {
+ leafCollectors[i + 1] =
+ getLeafCollector(leafMatchingDocs[i + 1], drillSidewaysCollectors.get(i));
+ scorer = createScorer(leafMatchingDocs[i + 1], drillSidewaysCollectors.get(i));
+ if (scorer != null) {
+ if (scorables == null) {
+ scorables = new MatchingDocsScorable[leafCollectors.length];
+ }
+ scorables[i + 1] = scorer;
+ leafCollectors[i + 1].setScorer(scorer);
+ }
+ }
+ collectLeaf(leafMatchingDocs, leafCollectors, scorables);
+ }
+ return null;
+ }
+
+ private static void collectLeaf(
+ FacetsCollector.MatchingDocs[] matchingDocs,
+ LeafCollector[] leafCollectors,
+ MatchingDocsScorable[] scorables)
+ throws IOException {
+ assert matchingDocs.length == leafCollectors.length;
+ // init
+ int currentDocToCollect = NO_MORE_DOCS;
+ // TODO: can move iterators out of this method, pass instead of matchingDocs?
+ DocIdSetIterator[] iterators = new DocIdSetIterator[matchingDocs.length];
+ for (int i = 0; i < matchingDocs.length; i++) {
+ if (matchingDocs[i] != null && leafCollectors[i] != null) {
+ iterators[i] = matchingDocs[i].bits().iterator();
+ int firstDoc = iterators[i].nextDoc();
+ if (firstDoc != NO_MORE_DOCS
+ && (currentDocToCollect == NO_MORE_DOCS || currentDocToCollect > firstDoc)) {
+ currentDocToCollect = firstDoc;
+ }
+ }
+ }
+ // collection
+ int nextDocToCollect;
+ while (currentDocToCollect < NO_MORE_DOCS) {
+ nextDocToCollect = Integer.MAX_VALUE;
+ for (int i = 0; i < iterators.length; i++) {
+ if (iterators[i] == null) {
+ continue;
+ }
+ if (iterators[i].docID() == currentDocToCollect) {
+ assert leafCollectors[i] != null
+ : "leafCollectors[" + i + "] is null but the iterator is not null";
+ if (scorables != null && scorables[i] != null) {
+ scorables[i].setCurrentDocId(currentDocToCollect);
+ }
+ leafCollectors[i].collect(currentDocToCollect);
+ int nextDoc = iterators[i].nextDoc();
+ if (nextDoc == NO_MORE_DOCS) {
+ iterators[i] = null;
+ } else if (nextDocToCollect > nextDoc) {
+ nextDocToCollect = nextDoc;
+ }
+ } else {
+ assert iterators[i].docID() > currentDocToCollect
+ : "currentDocToCollect ("
+ + currentDocToCollect
+ + ") should always be greater than iterators[i].docID() ("
+ + iterators[i].docID()
+ + ")";
+
+ if (nextDocToCollect > iterators[i].docID()) {
+ nextDocToCollect = iterators[i].docID();
+ }
+ }
+ }
+ assert nextDocToCollect > currentDocToCollect;
+ currentDocToCollect = nextDocToCollect;
+ }
+ // finish
+ for (LeafCollector leafCollector : leafCollectors) {
+ if (leafCollector != null) {
+ leafCollector.finish();
+ }
+ }
+ }
+}
diff --git a/lucene/sandbox/src/test/org/apache/lucene/sandbox/facet/utils/TestPostCollectionFaceting.java b/lucene/sandbox/src/test/org/apache/lucene/sandbox/facet/utils/TestPostCollectionFaceting.java
new file mode 100644
index 000000000000..cb290f0d8934
--- /dev/null
+++ b/lucene/sandbox/src/test/org/apache/lucene/sandbox/facet/utils/TestPostCollectionFaceting.java
@@ -0,0 +1,593 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.sandbox.facet.utils;
+
+import static java.util.concurrent.Executors.newFixedThreadPool;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import java.util.concurrent.ExecutorService;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.facet.FacetsCollector;
+import org.apache.lucene.index.DirectoryReader;
+import org.apache.lucene.index.IndexWriter;
+import org.apache.lucene.index.LeafReaderContext;
+import org.apache.lucene.index.NoMergePolicy;
+import org.apache.lucene.search.CollectorManager;
+import org.apache.lucene.search.Scorable;
+import org.apache.lucene.search.ScoreMode;
+import org.apache.lucene.search.SimpleCollector;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.tests.util.LuceneTestCase;
+import org.apache.lucene.util.DocIdSetBuilder;
+import org.apache.lucene.util.NamedThreadFactory;
+
+public class TestPostCollectionFaceting extends LuceneTestCase {
+
+ private final List testReaders = new ArrayList<>();
+ private final List testDirs = new ArrayList<>();
+
+ @Override
+ public void tearDown() throws Exception {
+ for (DirectoryReader reader : testReaders) {
+ reader.close();
+ }
+ for (Directory dir : testDirs) {
+ dir.close();
+ }
+ super.tearDown();
+ }
+
+ public void testBasic() throws IOException {
+ List contexts = createLeafContexts(2);
+ LeafReaderContext ctx0 = contexts.get(0);
+ LeafReaderContext ctx1 = contexts.get(1);
+
+ TestFacetsCollector drillDownFacets = createFacetsCollector(ctx0, 2, 3);
+ addMatchingDocs(drillDownFacets, ctx1, 5);
+
+ TestFacetsCollector dim1Facets = createFacetsCollector(ctx0, 1, 2, 6);
+ addMatchingDocs(dim1Facets, ctx1, 4, 7);
+
+ TestFacetsCollector dim2Facets = createFacetsCollector(ctx0, 2, 3, 8);
+ addMatchingDocs(dim2Facets, ctx1, 5, 9);
+
+ Map drillSidewaysFacets = new HashMap<>();
+ drillSidewaysFacets.put("dim1", dim1Facets);
+ drillSidewaysFacets.put("dim2", dim2Facets);
+
+ DocCountCollectorManager drillDownManager = new DocCountCollectorManager();
+ Map drillSidewaysManagers = new HashMap<>();
+ drillSidewaysManagers.put("dim1", new DocCountCollectorManager());
+ drillSidewaysManagers.put("dim2", new DocCountCollectorManager());
+
+ PostCollectionFaceting
+ faceting =
+ new PostCollectionFaceting<>(
+ drillDownManager,
+ drillSidewaysManagers,
+ drillDownFacets,
+ drillSidewaysFacets,
+ null);
+
+ PostCollectionFaceting.Result result = faceting.collect();
+
+ assertNotNull(result.drillDownResult());
+ assertEquals(3, result.drillDownResult().totalDocs);
+ assertEquals(Set.of(2, 3, 5), result.drillDownResult().allDocIds);
+
+ assertNotNull(result.drillSidewaysResults());
+ assertEquals(2, result.drillSidewaysResults().size());
+
+ DocCountResult dim1Result = result.drillSidewaysResults().get("dim1");
+ assertNotNull(dim1Result);
+ assertEquals(5, dim1Result.totalDocs);
+ assertEquals(Set.of(1, 2, 6, 4, 7), dim1Result.allDocIds);
+
+ DocCountResult dim2Result = result.drillSidewaysResults().get("dim2");
+ assertNotNull(dim2Result);
+ assertEquals(5, dim2Result.totalDocs);
+ assertEquals(Set.of(2, 3, 8, 5, 9), dim2Result.allDocIds);
+ }
+
+ public void testEmptySlices() throws IOException {
+ TestFacetsCollector drillDownFacets = new TestFacetsCollector();
+ Map drillSidewaysFacets = new HashMap<>();
+ drillSidewaysFacets.put("dim1", new TestFacetsCollector());
+
+ DocCountCollectorManager drillDownManager = new DocCountCollectorManager();
+ Map drillSidewaysManagers = new HashMap<>();
+ drillSidewaysManagers.put("dim1", new DocCountCollectorManager());
+
+ PostCollectionFaceting
+ faceting =
+ new PostCollectionFaceting<>(
+ drillDownManager,
+ drillSidewaysManagers,
+ drillDownFacets,
+ drillSidewaysFacets,
+ null);
+
+ PostCollectionFaceting.Result result = faceting.collect();
+
+ assertNotNull(result.drillDownResult());
+ assertEquals(0, result.drillDownResult().totalDocs);
+ assertEquals(1, result.drillSidewaysResults().size());
+ assertEquals(0, result.drillSidewaysResults().get("dim1").totalDocs);
+ }
+
+ public void testScoreCollector() throws IOException {
+ List contexts = createLeafContexts(1);
+ TestFacetsCollector drillDownFacets =
+ createFacetsCollectorWithScores(
+ contexts.get(0), new int[] {0, 1}, new float[] {1.0f, 2.0f});
+
+ Map drillSidewaysFacets = new HashMap<>();
+ drillSidewaysFacets.put(
+ "dim1",
+ createFacetsCollectorWithScores(contexts.get(0), new int[] {0}, new float[] {1.5f}));
+
+ ScoreCollectorManager drillDownManager = new ScoreCollectorManager();
+ Map drillSidewaysManagers = new HashMap<>();
+ drillSidewaysManagers.put("dim1", new ScoreCollectorManager());
+
+ PostCollectionFaceting faceting =
+ new PostCollectionFaceting<>(
+ drillDownManager, drillSidewaysManagers, drillDownFacets, drillSidewaysFacets, null);
+
+ PostCollectionFaceting.Result result = faceting.collect();
+ assertNotNull(result.drillDownResult());
+ assertEquals(3.0f, result.drillDownResult(), 0.001f);
+ assertEquals(1.5f, result.drillSidewaysResults().get("dim1"), 0.001f);
+ }
+
+ public void testScoreCollectorWithoutScores() throws IOException {
+ List contexts = createLeafContexts(1);
+ TestFacetsCollector drillDownFacets = createFacetsCollector(contexts.get(0), 1, 2);
+
+ Map drillSidewaysFacets = new HashMap<>();
+ drillSidewaysFacets.put("dim1", createFacetsCollector(contexts.get(0), 1));
+
+ ScoreCollectorManager drillDownManager = new ScoreCollectorManager();
+ Map drillSidewaysManagers = new HashMap<>();
+ drillSidewaysManagers.put("dim1", new ScoreCollectorManager());
+
+ PostCollectionFaceting faceting =
+ new PostCollectionFaceting<>(
+ drillDownManager, drillSidewaysManagers, drillDownFacets, drillSidewaysFacets, null);
+
+ try {
+ faceting.collect();
+ fail("Expected IllegalStateException");
+ } catch (IllegalStateException e) {
+ assertTrue(e.getMessage().contains("Collector requires scores"));
+ }
+ }
+
+ public void testMultipleSlices() throws IOException {
+ List contexts = createLeafContexts(3);
+
+ TestFacetsCollector drillDownFacets =
+ createLargeFacetsCollector(contexts, PostCollectionFaceting.MIN_DOCS_PER_SLICE);
+ TestFacetsCollector dim1Facets =
+ createLargeFacetsCollector(contexts, PostCollectionFaceting.MIN_DOCS_PER_SLICE * 2);
+
+ Map drillSidewaysFacets = new HashMap<>();
+ drillSidewaysFacets.put("dim1", dim1Facets);
+
+ DocCountCollectorManager drillDownManager = new DocCountCollectorManager();
+ Map drillSidewaysManagers = new HashMap<>();
+ drillSidewaysManagers.put("dim1", new DocCountCollectorManager());
+
+ PostCollectionFaceting
+ faceting =
+ new PostCollectionFaceting<>(
+ drillDownManager,
+ drillSidewaysManagers,
+ drillDownFacets,
+ drillSidewaysFacets,
+ null);
+
+ PostCollectionFaceting.Result result = faceting.collect();
+ assertNotNull(result.drillDownResult());
+ assertEquals(PostCollectionFaceting.MIN_DOCS_PER_SLICE, result.drillDownResult().totalDocs);
+ assertEquals(
+ PostCollectionFaceting.MIN_DOCS_PER_SLICE * 2,
+ result.drillSidewaysResults().get("dim1").totalDocs);
+ }
+
+ public void testNullDrillSideways() throws IOException {
+ List contexts = createLeafContexts(1);
+ TestFacetsCollector drillDownFacets = createFacetsCollector(contexts.get(0), 1, 2);
+
+ DocCountCollectorManager drillDownManager = new DocCountCollectorManager();
+
+ PostCollectionFaceting
+ faceting = new PostCollectionFaceting<>(drillDownManager, drillDownFacets, null);
+
+ PostCollectionFaceting.Result result = faceting.collect();
+ assertNotNull(result.drillDownResult());
+ assertEquals(2, result.drillDownResult().totalDocs);
+ assertEquals(Set.of(1, 2), result.drillDownResult().allDocIds);
+ assertNotNull(result.drillSidewaysResults());
+ assertEquals(0, result.drillSidewaysResults().size());
+ }
+
+ public void testNullDrillDownManager() throws IOException {
+ List contexts = createLeafContexts(1);
+ TestFacetsCollector drillDownFacets = createFacetsCollector(contexts.get(0), 1, 2);
+ TestFacetsCollector dim1Facets = createFacetsCollector(contexts.get(0), 1);
+
+ Map drillSidewaysFacets = new HashMap<>();
+ drillSidewaysFacets.put("dim1", dim1Facets);
+
+ Map drillSidewaysManagers = new HashMap<>();
+ drillSidewaysManagers.put("dim1", new DocCountCollectorManager());
+
+ PostCollectionFaceting
+ faceting =
+ new PostCollectionFaceting<>(
+ null, drillSidewaysManagers, drillDownFacets, drillSidewaysFacets, null);
+
+ PostCollectionFaceting.Result result = faceting.collect();
+ assertNull(result.drillDownResult());
+ assertNotNull(result.drillSidewaysResults());
+ assertEquals(1, result.drillSidewaysResults().size());
+ assertEquals(1, result.drillSidewaysResults().get("dim1").totalDocs);
+ }
+
+ public void testNullMatchingDocs() throws IOException {
+ List contexts = createLeafContexts(2);
+ LeafReaderContext ctx0 = contexts.get(0);
+ LeafReaderContext ctx1 = contexts.get(1);
+
+ // Create drill down facets with docs in both contexts
+ TestFacetsCollector drillDownFacets = createFacetsCollector(ctx0, 1, 2);
+ addMatchingDocs(drillDownFacets, ctx1, 3);
+
+ // Create drill sideways facets with docs only in ctx0, leaving ctx1 with null matching docs
+ TestFacetsCollector dim1Facets = createFacetsCollector(ctx0, 1);
+ // Intentionally not adding matching docs for ctx1 to create null scenario
+
+ Map drillSidewaysFacets = new HashMap<>();
+ drillSidewaysFacets.put("dim1", dim1Facets);
+
+ DocCountCollectorManager drillDownManager = new DocCountCollectorManager();
+ Map drillSidewaysManagers = new HashMap<>();
+ drillSidewaysManagers.put("dim1", new DocCountCollectorManager());
+
+ PostCollectionFaceting
+ faceting =
+ new PostCollectionFaceting<>(
+ drillDownManager,
+ drillSidewaysManagers,
+ drillDownFacets,
+ drillSidewaysFacets,
+ null);
+
+ PostCollectionFaceting.Result result = faceting.collect();
+ assertNotNull(result.drillDownResult());
+ assertEquals(3, result.drillDownResult().totalDocs);
+ assertEquals(1, result.drillSidewaysResults().get("dim1").totalDocs);
+ }
+
+ public void testMismatchedDimensionMaps() throws IOException {
+ List contexts = createLeafContexts(1);
+
+ // Scenario 1: FacetsCollector is missing for a dimension
+ TestFacetsCollector drillDownFacets = createFacetsCollector(contexts.get(0), 1, 2);
+
+ Map drillSidewaysFacets = new HashMap<>();
+ drillSidewaysFacets.put("dim1", createFacetsCollector(contexts.get(0), 1));
+
+ DocCountCollectorManager drillDownManager = new DocCountCollectorManager();
+ Map drillSidewaysManagers = new HashMap<>();
+ drillSidewaysManagers.put("dim1", new DocCountCollectorManager());
+ drillSidewaysManagers.put("dim2", new DocCountCollectorManager());
+
+ PostCollectionFaceting
+ faceting =
+ new PostCollectionFaceting<>(
+ drillDownManager,
+ drillSidewaysManagers,
+ drillDownFacets,
+ drillSidewaysFacets,
+ null);
+
+ PostCollectionFaceting.Result result = faceting.collect();
+ assertNotNull(result.drillDownResult());
+ assertEquals(2, result.drillDownResult().totalDocs);
+ assertEquals(1, result.drillSidewaysResults().size());
+ assertTrue(result.drillSidewaysResults().containsKey("dim1"));
+ assertFalse(result.drillSidewaysResults().containsKey("dim2"));
+
+ // Scenario 1: collector manager is missing for a dimension
+ drillDownFacets = createFacetsCollector(contexts.get(0), 1, 2);
+
+ drillSidewaysFacets = new HashMap<>();
+ drillSidewaysFacets.put("dim1", createFacetsCollector(contexts.get(0), 1));
+ drillSidewaysFacets.put("dim2", createFacetsCollector(contexts.get(0), 1, 2));
+
+ drillDownManager = new DocCountCollectorManager();
+ drillSidewaysManagers = new HashMap<>();
+ drillSidewaysManagers.put("dim1", new DocCountCollectorManager());
+
+ faceting =
+ new PostCollectionFaceting<>(
+ drillDownManager, drillSidewaysManagers, drillDownFacets, drillSidewaysFacets, null);
+
+ result = faceting.collect();
+ assertNotNull(result.drillDownResult());
+ assertEquals(2, result.drillDownResult().totalDocs);
+ assertEquals(1, result.drillSidewaysResults().size());
+ assertTrue(result.drillSidewaysResults().containsKey("dim1"));
+ assertFalse(result.drillSidewaysResults().containsKey("dim2"));
+ }
+
+ public void testRandomConcurrentExecution() throws IOException {
+ List contexts = createLeafContexts(4);
+
+ // Ensure enough docs to create 3 slices (threshold is MIN_DOCS_PER_SLICE)
+ int docsPerLeaf = PostCollectionFaceting.MIN_DOCS_PER_SLICE * 3 / contexts.size() + 10;
+
+ var drillDownResult = createRandomFacetsCollector(contexts, docsPerLeaf);
+ TestFacetsCollector drillDownFacets = drillDownResult.collector();
+ Set expectedDrillDownDocs = drillDownResult.allDocIds();
+
+ var dim1Result = createRandomFacetsCollector(contexts, 5);
+ TestFacetsCollector dim1Facets = dim1Result.collector();
+ Set expectedDim1Docs = dim1Result.allDocIds();
+
+ Map drillSidewaysFacets = new HashMap<>();
+ drillSidewaysFacets.put("dim1", dim1Facets);
+
+ DocCountCollectorManager drillDownManager = new DocCountCollectorManager();
+ Map drillSidewaysManagers = new HashMap<>();
+ drillSidewaysManagers.put("dim1", new DocCountCollectorManager());
+
+ // Use multi-threaded executor
+ ExecutorService executor =
+ newFixedThreadPool(3, new NamedThreadFactory("TestPostCollectionFaceting"));
+ try {
+ PostCollectionFaceting
+ faceting =
+ new PostCollectionFaceting<>(
+ drillDownManager,
+ drillSidewaysManagers,
+ drillDownFacets,
+ drillSidewaysFacets,
+ executor);
+
+ PostCollectionFaceting.Result result = faceting.collect();
+
+ // Assert exact doc IDs match
+ assertEquals(expectedDrillDownDocs.size(), result.drillDownResult().totalDocs);
+ assertEquals(expectedDrillDownDocs, result.drillDownResult().allDocIds);
+
+ assertEquals(expectedDim1Docs.size(), result.drillSidewaysResults().get("dim1").totalDocs);
+ assertEquals(expectedDim1Docs, result.drillSidewaysResults().get("dim1").allDocIds);
+ } finally {
+ executor.shutdown();
+ }
+ }
+
+ // Helper methods
+
+ private record FacetsCollectorData(TestFacetsCollector collector, Set allDocIds) {}
+
+ private FacetsCollectorData createRandomFacetsCollector(
+ List contexts, int docsPerLeaf) {
+ Set allDocIds = new HashSet<>();
+ TestFacetsCollector collector = new TestFacetsCollector();
+
+ for (LeafReaderContext context : contexts) {
+ Set leafDocs = new HashSet<>();
+ while (leafDocs.size() < docsPerLeaf) {
+ int docId;
+ do {
+ docId = random().nextInt();
+ } while (docId < 0 || docId == Integer.MAX_VALUE || allDocIds.contains(docId));
+ leafDocs.add(docId);
+ allDocIds.add(docId);
+ }
+ int[] docIds = leafDocs.stream().mapToInt(Integer::intValue).toArray();
+ addMatchingDocs(collector, context, docIds);
+ }
+
+ return new FacetsCollectorData(collector, allDocIds);
+ }
+
+ private TestFacetsCollector createFacetsCollectorWithScores(
+ LeafReaderContext context, int[] docIds, float[] scores) {
+ TestFacetsCollector fc = new TestFacetsCollector();
+ addMatchingDocs(fc, context, docIds, scores);
+ return fc;
+ }
+
+ private TestFacetsCollector createLargeFacetsCollector(
+ List contexts, int totalDocs) {
+ TestFacetsCollector fc = new TestFacetsCollector();
+ int docsPerContext = totalDocs / contexts.size();
+ int remainder = totalDocs % contexts.size();
+
+ for (int i = 0; i < contexts.size(); i++) {
+ int docsInThisContext = docsPerContext;
+ if (i < remainder) {
+ docsInThisContext++;
+ }
+ int[] docIds = new int[docsInThisContext];
+ for (int j = 0; j < docsInThisContext; j++) {
+ docIds[j] = j;
+ }
+ addMatchingDocs(fc, contexts.get(i), docIds);
+ }
+ return fc;
+ }
+
+ private List createLeafContexts(int count) throws IOException {
+ Directory dir = newDirectory();
+ IndexWriter writer =
+ new IndexWriter(dir, newIndexWriterConfig().setMergePolicy(NoMergePolicy.INSTANCE));
+
+ for (int i = 0; i < count; i++) {
+ writer.addDocument(new Document());
+ writer.commit();
+ }
+ writer.close();
+
+ DirectoryReader reader = DirectoryReader.open(dir);
+ List contexts = reader.leaves();
+ assertEquals(count, contexts.size());
+
+ testReaders.add(reader);
+ testDirs.add(dir);
+
+ return contexts;
+ }
+
+ private TestFacetsCollector createFacetsCollector(LeafReaderContext context, int... docIds) {
+ TestFacetsCollector fc = new TestFacetsCollector();
+ addMatchingDocs(fc, context, docIds);
+ return fc;
+ }
+
+ private void addMatchingDocs(TestFacetsCollector fc, LeafReaderContext context, int... docIds) {
+ addMatchingDocs(fc, context, docIds, null);
+ }
+
+ private void addMatchingDocs(
+ TestFacetsCollector fc, LeafReaderContext context, int[] docIds, float[] scores) {
+ int maxDocId = Arrays.stream(docIds).max().orElse(0);
+ DocIdSetBuilder builder = new DocIdSetBuilder(maxDocId + 1);
+ DocIdSetBuilder.BulkAdder adder = builder.grow(docIds.length);
+ for (int docId : docIds) {
+ adder.add(docId);
+ }
+
+ FacetsCollector.MatchingDocs matchingDocs =
+ new FacetsCollector.MatchingDocs(context, builder.build(), docIds.length, scores);
+ fc.addMatchingDocs(matchingDocs);
+ }
+
+ // Test helper classes
+
+ private static final class DocCountCollector extends SimpleCollector {
+ private int docCount = 0;
+ private final Set allDocIds = new HashSet<>();
+
+ @Override
+ public void collect(int doc) {
+ docCount++;
+ allDocIds.add(doc);
+ }
+
+ @Override
+ public ScoreMode scoreMode() {
+ return ScoreMode.COMPLETE_NO_SCORES;
+ }
+
+ public int getDocCount() {
+ return docCount;
+ }
+
+ public Set getAllDocIds() {
+ return allDocIds;
+ }
+ }
+
+ private record DocCountResult(int totalDocs, Set allDocIds) {}
+
+ private static final class DocCountCollectorManager
+ implements CollectorManager {
+ @Override
+ public DocCountCollector newCollector() {
+ return new DocCountCollector();
+ }
+
+ @Override
+ public DocCountResult reduce(Collection collectors) {
+ int total = 0;
+ Set allDocs = new HashSet<>();
+ for (DocCountCollector c : collectors) {
+ total += c.getDocCount();
+ allDocs.addAll(c.getAllDocIds());
+ }
+ return new DocCountResult(total, allDocs);
+ }
+ }
+
+ private static final class TestFacetsCollector extends FacetsCollector {
+ private final List testMatchingDocs = new ArrayList<>();
+
+ void addMatchingDocs(MatchingDocs docs) {
+ testMatchingDocs.add(docs);
+ }
+
+ @Override
+ public List getMatchingDocs() {
+ return testMatchingDocs;
+ }
+ }
+
+ private static final class ScoreCollector extends SimpleCollector {
+ private float totalScore = 0;
+ private Scorable scorer;
+
+ @Override
+ public void collect(int doc) throws IOException {
+ totalScore += scorer.score();
+ }
+
+ @Override
+ public ScoreMode scoreMode() {
+ return ScoreMode.COMPLETE;
+ }
+
+ @Override
+ public void setScorer(Scorable scorer) {
+ this.scorer = scorer;
+ }
+
+ public float getTotalScore() {
+ return totalScore;
+ }
+ }
+
+ private static final class ScoreCollectorManager
+ implements CollectorManager {
+ @Override
+ public ScoreCollector newCollector() {
+ return new ScoreCollector();
+ }
+
+ @Override
+ public Float reduce(Collection collectors) {
+ float total = 0;
+ for (ScoreCollector c : collectors) {
+ total += c.getTotalScore();
+ }
+ return total;
+ }
+ }
+}