datacommonsorg · abhishekjaisw · Jan 29, 2026 · Jan 27, 2026 · Jan 27, 2026 · Jan 28, 2026
diff --git a/statvar_imports/statistics_poland/README.md b/statvar_imports/statistics_poland/README.md
@@ -38,10 +38,9 @@ python3 tools/statvar_importer/stat_var_processor.py \
 **For Main data run**
 ```bash
 python3 tools/statvar_importer/stat_var_processor.py \
-    --input_data='statvar_imports/statistics_poland/poland_input/StatisticsPoland_input.csv' \
+    --input_data='statvar_imports/statistics_poland/StatisticsPoland_input.csv' \
     --pv_map='statvar_imports/statistics_poland/StatisticsPoland_pvmap.csv' \
-    --output_path='statvar_imports/statistics_poland/poland_output/StatisticsPoland_output' \
+    --output_path='statvar_imports/statistics_poland/StatisticsPoland_output' \
     --config_file='statvar_imports/statistics_poland/Statistics_Poland_metadata.csv' \
     --existing_statvar_mcf=gs://unresolved_mcf/scripts/statvar/stat_vars.mcf
 ```
-
diff --git a/statvar_imports/statistics_poland/download_input_data.py b/statvar_imports/statistics_poland/download_input_data.py
@@ -1,31 +1,39 @@
 import pandas as pd
 import os
 import logging
+import sys
 from datetime import datetime
 
 # Configure logging
 logging.basicConfig(
-    level=logging.INFO,
+    level=logging.INFO, 
     format='%(levelname)s: %(message)s'
 )
 
-# Configuration
-INPUT_FILE = "statvar_imports/statistics_poland/poland_data_sample/poland_raw.xlsx"
-OUTPUT_DIR = "statvar_imports/statistics_poland/poland_input"
-OUTPUT_FILE = os.path.join(OUTPUT_DIR, "StatisticsPoland_input.csv")
+# --- FLATTENED PATH LOGIC ---
+# Get the directory where THIS script is actually saved
+BASE_PATH = os.path.dirname(os.path.abspath(__file__))
+
+# Input remains in the sample subfolder
+INPUT_FILE = os.path.join(BASE_PATH, "poland_data_sample/poland_raw.xlsx")
+
+# Output is now saved directly in BASE_PATH (the root of the import folder)
+# This ensures stat_var_processor.py can find it in the Cloud environment
+OUTPUT_FILE = os.path.join(BASE_PATH, "StatisticsPoland_input.csv")
 
-# Target functional age groups
 TARGET_AGES = [
     "0-2", "3-6", "7-12", "13-15", "16-19", "20-24", 
     "25-34", "35-44", "45-54", "55-64", "65 i więcej"
 ]
 
 def process_poland_pivot():
+    # Verify input exists
     if not os.path.exists(INPUT_FILE):
-        logging.error(f"{INPUT_FILE} not found.")
-        return
+        logging.error(f"CRITICAL ERROR: {INPUT_FILE} not found.")
+        # Tells the automation executor to STOP here
+        sys.exit(1)
 
-    logging.info(f"Starting generic processing. Saving to: {OUTPUT_FILE}")
+    logging.info(f"Processing data from: {INPUT_FILE}")
 
     try:
         # 1. Load the 'DANE' sheet
@@ -35,7 +43,6 @@ def process_poland_pivot():
         # 2. Generic Filtering
         df = df[df['Age'].isin(TARGET_AGES)]
 
-        # DYNAMIC YEAR LOGIC
         current_year = datetime.now().year
         available_years = sorted([y for y in df['Year'].unique() if y <= current_year])
         df = df[df['Year'].isin(available_years)]
@@ -51,7 +58,6 @@ def process_poland_pivot():
             '65 i więcej': '65 and more'
         }
 
-        # Refactored repetitive replace calls into a loop
         for col in ['Sex', 'Location', 'Name', 'Age']:
             df[col] = df[col].replace(translations)
 
@@ -62,21 +68,21 @@ def process_poland_pivot():
             values='Value'
         )
 
-        # 5. Format Geographic Codes (ensuring 7-digit padding)
+        # 5. Format Geographic Codes
         pivot_df.index = pivot_df.index.set_levels(
             pivot_df.index.levels[0].astype(str).str.zfill(7), level=0
         )
 
-        # 6. Save result
-        os.makedirs(OUTPUT_DIR, exist_ok=True)
+        # 6. Save result directly to BASE_PATH
+        # encoding='utf-8' is crucial for Polish characters
         pivot_df.to_csv(OUTPUT_FILE, encoding='utf-8')
 
-        logging.info(f"SUCCESS: {OUTPUT_FILE} has been updated.")
+        logging.info(f"SUCCESS: {OUTPUT_FILE} created in the root directory.")
         logging.info(f"Years Included: {available_years}")
-        logging.info(f"Total Geographies Processed: {pivot_df.shape[0]}")
 
     except Exception as e:
         logging.error(f"Processing Error: {e}")
+        sys.exit(1)
 
 if __name__ == "__main__":
     process_poland_pivot()
diff --git a/statvar_imports/statistics_poland/manifest.json b/statvar_imports/statistics_poland/manifest.json
@@ -9,15 +9,15 @@
             "provenance_description": "Population data for demographic variables such as population counts, age distributions, and other census-related metrics in Poland",
             "scripts": [
                 "download_input_data.py",
-                "../../tools/statvar_importer/stat_var_processor.py --input_data=poland_input/StatisticsPoland_input.csv --pv_map=StatisticsPoland_pvmap.csv --config_file=Statistics_Poland_metadata.csv --output_path=poland_output/StatisticsPoland_output"
+                "../../tools/statvar_importer/stat_var_processor.py --input_data=StatisticsPoland_input.csv --pv_map=StatisticsPoland_pvmap.csv --config_file=Statistics_Poland_metadata.csv --output_path=StatisticsPoland_output"
             ],
             "source_files": [
-                "poland_input/StatisticsPoland_input.csv"
+                "StatisticsPoland_input.csv"
             ],
             "import_inputs": [
                 {
-                    "template_mcf": "poland_output/StatisticsPoland_output.tmcf",
-                    "cleaned_csv": "poland_output/StatisticsPoland_output.csv"
+                    "template_mcf": "StatisticsPoland_output.tmcf",
+                    "cleaned_csv": "StatisticsPoland_output.csv"
                 }
             ],
             "cron_schedule": "0 0 1 1,4,7,10 *"