|
1 | 1 | from dataclasses import asdict, dataclass |
2 | 2 | from functools import lru_cache |
3 | 3 | from typing import Any, Dict, Iterable, List, Optional, Tuple, Union |
4 | | - |
| 4 | +from datetime import datetime, timezone |
5 | 5 | import copy |
6 | 6 | import math |
7 | 7 | import time |
|
32 | 32 | gen_overlay, |
33 | 33 | ) |
34 | 34 | from .elastic import ( |
| 35 | + parse_duration_interval, |
35 | 36 | get_field_type, |
36 | 37 | get_search_base, |
37 | 38 | convert_composite, |
@@ -1187,7 +1188,7 @@ def generate_tile(idx, x, y, z, headers, params, tile_width_px=256, tile_height_ |
1187 | 1188 | resp = max_value_s.execute() |
1188 | 1189 | estimated_points_per_tile = resp.aggregations.comp.buckets[0].doc_count |
1189 | 1190 | span = [0,estimated_points_per_tile] |
1190 | | - logger.info("EST Points: %s",estimated_points_per_tile) |
| 1191 | + logger.info("EST Points: %s %s",estimated_points_per_tile,category_field) |
1191 | 1192 |
|
1192 | 1193 | searches = [] |
1193 | 1194 | composite_agg_size = 65536#max agg bucket size |
@@ -1230,6 +1231,13 @@ def remap_bucket(bucket,search): |
1230 | 1231 | if category_field: |
1231 | 1232 | #bucket_callback = calc_aggregation #don't run a sub query. sub aggregation worked But we might want to leave this in for cross index searches |
1232 | 1233 | bucket_callback = remap_bucket |
| 1234 | + |
| 1235 | + if params['timeOverlap']:#run scan using date intervals to check overlaps during the same time |
| 1236 | + subtile_bb_dict = create_bounding_box_for_tile(x, y, z) |
| 1237 | + interval = params['timeOverlapSize'] |
| 1238 | + logger.info("CREATING TIMEBUCKETS %s",interval) |
| 1239 | + searches = create_time_interval_searches(base_s,subtile_bb_dict,start_time,stop_time,timestamp_field,geopoint_field,geotile_precision,composite_agg_size,category_field,interval) |
| 1240 | + |
1233 | 1241 | resp = Scan(searches,timeout=config.query_timeout_seconds,bucket_callback=bucket_callback) |
1234 | 1242 | df = pd.DataFrame( |
1235 | 1243 | convert_composite( |
@@ -1370,3 +1378,55 @@ def remap_bucket(bucket,search): |
1370 | 1378 | "An exception occured while attempting to generate a tile:" |
1371 | 1379 | ) |
1372 | 1380 | raise |
| 1381 | + |
| 1382 | + |
| 1383 | +def create_time_interval_searches(base_s,subtile_bb_dict,start_time,stop_time,timestamp_field,geopoint_field,geotile_precision,composite_agg_size,category_field,interval="auto"): |
| 1384 | + stime = start_time |
| 1385 | + searches = [] |
| 1386 | + if interval == "auto": |
| 1387 | + subtile_s = copy.copy(base_s) |
| 1388 | + subtile_s = subtile_s[0:0] |
| 1389 | + subtile_s = subtile_s.filter("geo_bounding_box", **{geopoint_field: subtile_bb_dict}) |
| 1390 | + subtile_s.aggs.bucket("by_time", "auto_date_histogram", field="lastupdated",buckets=546) |
| 1391 | + resp = subtile_s.execute() |
| 1392 | + interval = resp.aggregations.by_time.interval |
| 1393 | + #create a search for each bucket using the bucket time plus the interval |
| 1394 | + logger.info("Doing multiple queries based on interval %s",interval) |
| 1395 | + |
| 1396 | + for bucket in resp.aggregations.by_time: |
| 1397 | + subtile_s = copy.copy(base_s) |
| 1398 | + bucket_start_time = datetime.strptime(bucket.key_as_string,"%Y-%m-%dT%H:%M:%S.%fZ").replace(tzinfo=timezone.utc) |
| 1399 | + bucket_stop_time = bucket_start_time+ parse_duration_interval(interval) |
| 1400 | + |
| 1401 | + if timestamp_field: |
| 1402 | + time_range = {timestamp_field: {}} |
| 1403 | + if bucket_start_time is not None: |
| 1404 | + time_range[timestamp_field]["gte"] = bucket_start_time |
| 1405 | + if stop_time is not None: |
| 1406 | + time_range[timestamp_field]["lte"] = bucket_stop_time |
| 1407 | + |
| 1408 | + if time_range and time_range[timestamp_field]: |
| 1409 | + subtile_s = subtile_s.filter("range", **time_range) |
| 1410 | + bucket = subtile_s.aggs.bucket("comp", "geotile_grid", field=geopoint_field,precision=geotile_precision,size=composite_agg_size,bounds=subtile_bb_dict) |
| 1411 | + if category_field: |
| 1412 | + bucket.metric("sum","sum",field=category_field,missing=0) |
| 1413 | + searches.append(subtile_s) |
| 1414 | + return searches |
| 1415 | + |
| 1416 | + while stime < stop_time: |
| 1417 | + subtile_s = copy.copy(base_s) |
| 1418 | + subtile_s = subtile_s.filter("geo_bounding_box", **{geopoint_field: subtile_bb_dict}) |
| 1419 | + subtile_s = subtile_s[0:0] |
| 1420 | + bucket_start_time = stime |
| 1421 | + bucket_stop_time = bucket_start_time+ parse_duration_interval(interval) |
| 1422 | + time_range = {timestamp_field: {}} |
| 1423 | + time_range[timestamp_field]["gte"] = bucket_start_time |
| 1424 | + time_range[timestamp_field]["lte"] = bucket_stop_time |
| 1425 | + stime = bucket_stop_time |
| 1426 | + subtile_s = subtile_s.filter("range", **time_range) |
| 1427 | + bucket = subtile_s.aggs.bucket("comp", "geotile_grid", field=geopoint_field,precision=geotile_precision,size=composite_agg_size,bounds=subtile_bb_dict) |
| 1428 | + bucket.pipeline("selector","bucket_selector",buckets_path={"doc_count":"_count"},script="params.doc_count >= 2") |
| 1429 | + if category_field: |
| 1430 | + bucket.metric("sum","sum",field=category_field,missing=0) |
| 1431 | + searches.append(subtile_s) |
| 1432 | + return searches |
0 commit comments