Skip to content

Commit a98a593

Browse files
committed
[scripts] Add script to export all user's vector layers
1 parent c78de26 commit a98a593

File tree

1 file changed

+194
-0
lines changed

1 file changed

+194
-0
lines changed

scripts/forge_export_layers.py

Lines changed: 194 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,194 @@
1+
"""
2+
Export vector layers and metadata using ForgeClient.
3+
4+
For each project in the user account, creates as many folders as the images inside; each
5+
raster folder than contains - for each detection on it - the metadata about the detection
6+
and the geometries.
7+
8+
Usage:
9+
python scripts/forge_export_layers.py output_dir
10+
11+
Requires `PICTERRA_API_KEY` environment variable.
12+
13+
Usage
14+
-----
15+
16+
Set your API key in the environment and run the script:
17+
18+
```bash
19+
export PICTERRA_API_KEY="your_api_key_here"
20+
python scripts/forge_export_layers.py /path/to/output
21+
```
22+
23+
You can use the '--limit' option to test downloading only N layers.
24+
25+
You can use the '--skip' option to skip some layers: the script will always output
26+
a CSV file ('--csv-output') which contains the exported ids.
27+
28+
29+
Output layout
30+
-------------
31+
32+
For each vector layer a folder `<vector_id>_<safe_name>/` will be created containing:
33+
34+
- `vector.geojson` : downloaded GeoJSON of the vector layer
35+
- `metadata.json` : JSON with folder, raster, vector layer and detector metadata
36+
37+
"""
38+
import argparse
39+
import csv
40+
import json
41+
import os
42+
import re
43+
from typing import Any
44+
45+
from picterra.forge_client import ForgeClient
46+
47+
48+
def safe_name(name: str) -> str:
49+
return re.sub(r"[^0-9A-Za-z._-]", "_", name)[:200]
50+
51+
52+
def drain_results_page(page):
53+
items = []
54+
while page is not None:
55+
items.extend(list(page))
56+
page = page.next()
57+
return items
58+
59+
60+
def csv_to_set(csv_file: str) -> set[str]:
61+
result_set = set()
62+
assert os.path.isfile(csv_file)
63+
# Open the CSV file and read its content
64+
with open(csv_file, newline='', encoding='utf-8') as file:
65+
reader = csv.reader(file)
66+
for row in reader:
67+
# Add each cell's value as a string to the set
68+
for cell in row:
69+
result_set.add(cell.strip()) # Strip whitespace for clean values
70+
return result_set
71+
72+
73+
def append_id_to_single_row_csv(csv_file: str, new_id: str):
74+
# Read the existing content
75+
file_exists = os.path.isfile(csv_file)
76+
77+
# Read the existing content if the file exists
78+
if file_exists:
79+
with open(csv_file, mode='r', newline='', encoding='utf-8') as file:
80+
reader = csv.reader(file)
81+
# Read the single row
82+
row = next(reader)
83+
else:
84+
# If the file does not exist, create a new row
85+
row = []
86+
87+
# Append the new ID to the row
88+
row.append(new_id)
89+
90+
# Write the updated row back to the CSV
91+
with open(csv_file, mode='w', newline='', encoding='utf-8') as file:
92+
writer = csv.writer(file)
93+
writer.writerow(row)
94+
95+
96+
def main():
97+
# Parse command line arguments
98+
p = argparse.ArgumentParser()
99+
p.add_argument("outdir", help="Directory to write outputs")
100+
p.add_argument("--limit", help="Export only a max number of layers")
101+
p.add_argument("--skip", help="CSV path with vector layer IDs to skip")
102+
p.add_argument("--csv-output", help="CSV path for exported vector layer IDs", default="out.csv")
103+
args = p.parse_args()
104+
outdir = str(args.outdir)
105+
limit: int | None = args.limit
106+
skip_set = csv_to_set(args.skip) if args.skip else set()
107+
csv_output: str = args.csv_output
108+
os.makedirs(outdir, exist_ok=True)
109+
110+
# Create ForgeClient (needs PICTERRA_API_KEY env var)
111+
client = ForgeClient()
112+
113+
# Build detectors map
114+
detectors_map: dict[str, Any] = {}
115+
print("Listing detectors...")
116+
dp = client.list_detectors()
117+
for det in drain_results_page(dp):
118+
detectors_map[det["id"]] = det
119+
print(f"Found {len(detectors_map)} detectors\n")
120+
121+
# Get folders via internal paginated endpoint
122+
print("Listing folders...")
123+
folders_page = client._return_results_page("folders", None)
124+
folders = drain_results_page(folders_page)
125+
print(f"Found {len(folders)} folders\n")
126+
127+
count = 0
128+
for folder in folders:
129+
folder_id = folder.get("id")
130+
folder_name = folder.get("name")
131+
print(f"\nFolder: {folder_name} ({folder_id})")
132+
# List rasters in folder
133+
rp = client.list_rasters(folder_id=folder_id)
134+
rasters = drain_results_page(rp)
135+
for raster in rasters:
136+
raster_id = raster.get("id")
137+
raster_name = raster.get("name")
138+
print(f" Raster: {raster_name} ({raster_id})")
139+
# List vector layers for raster
140+
vpage = client.list_raster_vector_layers(raster_id=raster_id)
141+
vlayers = drain_results_page(vpage)
142+
for vl in vlayers:
143+
vl_id = vl.get("id")
144+
if vl_id in skip_set:
145+
print(f"Skip {vl_id}")
146+
continue
147+
vl_name = vl.get("name")
148+
detector_id = vl.get("detector_id")
149+
detector_data = detectors_map.get(detector_id)
150+
if detector_data is None:
151+
print(f"Skip {vl_name}")
152+
continue
153+
detector_name = detector_data["name"]
154+
folder_for_vl = os.path.join(
155+
outdir,
156+
safe_name(folder_name),
157+
safe_name(raster_name),
158+
f"{safe_name(detector_name)}_{safe_name(vl_name)}"
159+
)
160+
os.makedirs(folder_for_vl, exist_ok=True)
161+
162+
geojson_path = os.path.join(folder_for_vl, "vector.geojson")
163+
metadata_path = os.path.join(folder_for_vl, "metadata.json")
164+
165+
print(f" Downloading vector layer {vl_name} ({vl_id}) -> {geojson_path}")
166+
try:
167+
client.download_vector_layer_to_file(vl_id, geojson_path)
168+
except Exception as e:
169+
print(f" Error downloading layer {vl_id}: {e}")
170+
continue
171+
metadata = {
172+
"id": vl_id,
173+
"name": vl_name,
174+
"count": vl.get("count"),
175+
"created_at": vl.get("created_at"),
176+
"folder": {"id": folder_id, "name": folder_name},
177+
"raster": {"id": raster_id, "name": raster_name},
178+
"detector": {"id": detector_id, "name": detector_name},
179+
}
180+
with open(metadata_path, "w") as f:
181+
json.dump(metadata, f, indent=2)
182+
append_id_to_single_row_csv(csv_output, vl_id)
183+
count += 1
184+
if limit and count >= int(limit):
185+
break
186+
if limit and count >= int(limit):
187+
break
188+
if limit and count >= int(limit):
189+
break
190+
print(f"Exported {count} vector layers, their ids are in {csv_output}")
191+
192+
193+
if __name__ == "__main__":
194+
main()

0 commit comments

Comments
 (0)