apache · jainankitk · Apr 26, 2025 · Apr 26, 2025
diff --git a/...sandbox/src/java/org/apache/lucene/sandbox/facet/plain/histograms/HistogramCollector.java b/...sandbox/src/java/org/apache/lucene/sandbox/facet/plain/histograms/HistogramCollector.java
@@ -64,12 +64,19 @@ public LeafCollector getLeafCollector(LeafReaderContext context) throws IOExcept
       throw new CollectionTerminatedException();
     }
 
+    boolean docValuesIndexed =
+        fi.getDocValuesType() == DocValuesType.NUMERIC
+            || fi.getDocValuesType() == DocValuesType.SORTED_NUMERIC;
+
     // We can use multi range traversal logic to collect the histogram on numeric
     // field indexed as point for MATCH_ALL cases. In future, this can be extended
     // for Point Range Query cases as well
     if (weight != null && weight.count(context) == context.reader().maxDoc()) {
       final PointValues pointValues = context.reader().getPointValues(field);
-      if (PointTreeBulkCollector.canCollectEfficiently(pointValues, bucketWidth)) {
+      // Collect if docValues is not indexed, even if we cannot collect efficiently
+      if (PointTreeBulkCollector.canCollect(pointValues)
+          && (docValuesIndexed == false
+              || PointTreeBulkCollector.canCollectEfficiently(pointValues, bucketWidth))) {
         // In case of intra segment concurrency, only one collector should collect
         // documents for all the partitions to avoid duplications across collectors
         if (leafBulkCollected.putIfAbsent(context, true) == null) {
@@ -81,8 +88,7 @@ public LeafCollector getLeafCollector(LeafReaderContext context) throws IOExcept
       }
     }
 
-    if (fi.getDocValuesType() != DocValuesType.NUMERIC
-        && fi.getDocValuesType() != DocValuesType.SORTED_NUMERIC) {
+    if (docValuesIndexed == false) {
       throw new IllegalStateException(
           "Expected numeric field, but got doc-value type: " + fi.getDocValuesType());
     }

diff --git a/...box/src/java/org/apache/lucene/sandbox/facet/plain/histograms/PointTreeBulkCollector.java b/...box/src/java/org/apache/lucene/sandbox/facet/plain/histograms/PointTreeBulkCollector.java
@@ -45,21 +45,22 @@ private static Function<byte[], Long> bytesToLong(int numBytes) {
     return null;
   }
 
-  static boolean canCollectEfficiently(final PointValues pointValues, final long bucketWidth)
-      throws IOException {
+  static boolean canCollect(final PointValues pointValues) throws IOException {
     // We need pointValues.getDocCount() == pointValues.size() to count each doc only
     // once, including docs that have two values that fall into the same bucket.
     if (pointValues == null
         || pointValues.getNumDimensions() != 1
-        || pointValues.getDocCount() != pointValues.size()) {
+        || pointValues.getDocCount() != pointValues.size()
+        || bytesToLong(pointValues.getBytesPerDimension()) == null) {
       return false;
     }
 
-    final Function<byte[], Long> byteToLong = bytesToLong(pointValues.getBytesPerDimension());
-    if (byteToLong == null) {
-      return false;
-    }
+    return true;
+  }
 
+  static boolean canCollectEfficiently(final PointValues pointValues, final long bucketWidth)
+      throws IOException {
+    final Function<byte[], Long> byteToLong = bytesToLong(pointValues.getBytesPerDimension());
     long leafMinBucket =
         Math.floorDiv(byteToLong.apply(pointValues.getMinPackedValue()), bucketWidth);
     long leafMaxBucket =

diff --git a/.../test/org/apache/lucene/sandbox/facet/plain/histograms/TestHistogramCollectorManager.java b/.../test/org/apache/lucene/sandbox/facet/plain/histograms/TestHistogramCollectorManager.java
@@ -136,6 +136,8 @@ public void testMultiRangePointTreeCollector() throws IOException {
     DirectoryReader reader = DirectoryReader.open(w);
     w.close();
     IndexSearcher searcher = newSearcher(reader);
+
+    // MATCH_ALL case for efficiently collecting
     LongIntHashMap actualCounts =
         searcher.search(new MatchAllDocsQuery(), new HistogramCollectorManager("f", 1000));
     LongIntHashMap expectedCounts = new LongIntHashMap();
@@ -144,6 +146,15 @@ public void testMultiRangePointTreeCollector() throws IOException {
     }
     assertEquals(expectedCounts, actualCounts);
 
+    // MATCH_ALL case for collecting despite inefficiency since DocValues is not indexed
+    actualCounts =
+        searcher.search(new MatchAllDocsQuery(), new HistogramCollectorManager("f", 100));
+    expectedCounts = new LongIntHashMap();
+    for (long value : values) {
+      expectedCounts.addTo(Math.floorDiv(value, 100), 1);
+    }
+    assertEquals(expectedCounts, actualCounts);
+
     reader.close();
     dir.close();
   }