fix category spread, timeout, and epoch_millis time index

Sean Sullivan · Sean Sullivan · commit 5ff146d31ec7 · 2023-05-12T18:00:36.000-04:00
diff --git a/elastic_datashader/elastic.py b/elastic_datashader/elastic.py
@@ -216,7 +216,7 @@ def get_search_base(
     time_range = None
 
     if timestamp_field:
-        time_range = {timestamp_field: {}}
+        time_range = {timestamp_field: {"format": "strict_date_optional_time_nanos"}}
         if start_time is not None:
             time_range[timestamp_field]["gte"] = start_time
         if stop_time is not None:
@@ -680,7 +680,7 @@ def run_search(**kwargs):
             s = self.search[:0]
 
             if _timeout_at:
-                _time_remaining = _timeout_at - time.time()
+                _time_remaining = int(_timeout_at - time.time())
                 s = s.params(timeout=f"{_time_remaining}s")
 
             s.aggs.bucket("comp", "composite", sources=self.source_aggs, size=self.size, **kwargs)
diff --git a/elastic_datashader/tilegen.py b/elastic_datashader/tilegen.py
@@ -39,6 +39,7 @@
     get_nested_field_from_hit,
     to_32bit_float,
     Scan,
+    ScanAggs,
     get_tile_categories,
     scan
 )
@@ -852,23 +853,14 @@ def generate_nonaggregated_tile(
         raise
 
 @lru_cache
-def calculate_pixel_spread(geotile_precision: int) -> int:
+def calculate_pixel_spread(max_zoom: int,agg_zooms:int) -> int:
     '''
     Pixel spread is the number of pixels to put around each
     data point.
     '''
-    logger.debug('calculate_pixel_spread(%d)', geotile_precision)
+    logger.debug('calculate_pixel_spread(%d,%d)', max_zoom,agg_zooms)
+    return max(int(abs(max_zoom-agg_zooms)),0)
 
-    if geotile_precision >= 20:
-        return geotile_precision // 4
-
-    if geotile_precision >= 15:
-        return 2
-
-    if geotile_precision >= 12:
-        return 1
-
-    return 0
 
 def apply_spread(img, spread):
     '''
@@ -987,7 +979,6 @@ def generate_tile(idx, x, y, z, headers, params, tile_width_px=256, tile_height_
         # Now find out how many documents
         count_s = copy.copy(base_s)[0:0] # slice of array sets from/size since we are aggregating the data we don't need the hits
         count_s = count_s.filter("geo_bounding_box", **{geopoint_field: bb_dict})
-
         doc_cnt = count_s.count()
         logger.info("Document Count: %s", doc_cnt)
         metrics['doc_cnt'] = doc_cnt
@@ -1118,38 +1109,33 @@ def generate_tile(idx, x, y, z, headers, params, tile_width_px=256, tile_height_
         span = None
         if field_type == "geo_point":
             geo_tile_grid = A("geotile_grid", field=geopoint_field, precision=geotile_precision, size=max_bins)
-            estimated_points_per_tile = get_estimated_points_per_tile(span_range, global_bounds, z, global_doc_cnt)
+
             if params['bucket_min']>0 or params['bucket_max']<1:
-                if estimated_points_per_tile is None:
+                if global_doc_cnt is None:
                     # this isn't good we need a real number so lets query the max aggregation ammount
                     max_value_s = copy.copy(base_s)
                     bucket = max_value_s.aggs.bucket("comp", "geotile_grid", field=geopoint_field, precision=geotile_precision, size=1)
                     if category_field:
                         bucket.metric("sum", "sum", field=category_field, missing=0)
                     resp = max_value_s.execute()
                     if category_field:
-                        estimated_points_per_tile = resp.aggregations.comp.buckets[0].sum['value']
+                        global_doc_cnt = resp.aggregations.comp.buckets[0].sum['value']
                     else:
-                        estimated_points_per_tile = resp.aggregations.comp.buckets[0].doc_count
+                        global_doc_cnt = resp.aggregations.comp.buckets[0].doc_count
 
-                min_bucket = math.floor(math.exp(math.log(estimated_points_per_tile)*params['bucket_min']))
-                max_bucket = math.ceil(math.exp(math.log(estimated_points_per_tile)*params['bucket_max']))
+                min_bucket = math.floor(math.exp(math.log(global_doc_cnt)*params['bucket_min']))
+                max_bucket = math.ceil(math.exp(math.log(global_doc_cnt)*params['bucket_max']))
                 geo_tile_grid.pipeline("selector", "bucket_selector", buckets_path={"doc_count": "_count"}, script=f"params.doc_count >= {min_bucket} && params.doc_count <= {max_bucket}")
-
-            if inner_aggs is not None:
-                for agg_name, agg in inner_aggs.items():
-                    geo_tile_grid.aggs[agg_name] = agg
-            tile_s.aggs["comp"] = geo_tile_grid
-            resp = Scan([tile_s], timeout=config.query_timeout_seconds)
-            # resp = ScanAggs(
-            #     tile_s,
-            #     {"grids": geo_tile_grid},
-            #     inner_aggs,
-            #     size=composite_agg_size,
-            #     timeout=config.query_timeout_seconds
-            # ) # Dont use composite aggregator because you cannot use a bucket selector
-
-
+            if category_field:
+                geo_tile_grid = A("geotile_grid", field=geopoint_field, precision=geotile_precision)
+                resp = ScanAggs(tile_s,{"grids": geo_tile_grid},inner_aggs,size=composite_agg_size,timeout=config.query_timeout_seconds)
+            else:                    
+                if inner_aggs is not None:
+                    for agg_name, agg in inner_aggs.items():
+                        geo_tile_grid.aggs[agg_name] = agg
+                tile_s.aggs["comp"] = geo_tile_grid
+                resp = Scan([tile_s], timeout=config.query_timeout_seconds)
+            estimated_points_per_tile = get_estimated_points_per_tile(span_range, global_bounds, z, global_doc_cnt)
             df = pd.DataFrame(
                 convert_composite(
                     resp.execute(),
@@ -1359,7 +1345,8 @@ def remap_bucket(bucket, search):
 
         ###############################################################
         # Common
-        img = apply_spread(img, spread or calculate_pixel_spread(geotile_precision))
+        spread = spread or calculate_pixel_spread(max_agg_zooms,agg_zooms)
+        img = apply_spread(img, spread)
         img = img.to_bytesio().read()
 
         if partial_data: