diff --git a/prisma/migrations/20260414200000_add_admin_level_1/migration.sql b/prisma/migrations/20260414200000_add_admin_level_1/migration.sql new file mode 100644 index 00000000..716a8693 --- /dev/null +++ b/prisma/migrations/20260414200000_add_admin_level_1/migration.sql @@ -0,0 +1,78 @@ +-- Migration: Add admin_level_1 field to aed_locations +-- +-- admin_level_1 stores the first-level administrative division (region/state) +-- derived from coordinates via Nominatim reverse geocoding. +-- This is universal: "Comunidad de Madrid" (ES), "Île-de-France" (FR), +-- "Bayern" (DE), "California" (US), etc. +-- +-- Step 1: Add the column +-- Step 2: Backfill for Spain using the province INE code → community mapping +-- Step 3: Add index for geographic queries + +-- Step 1: Add column +ALTER TABLE "aed_locations" ADD COLUMN IF NOT EXISTS "admin_level_1" TEXT; + +-- Step 2: Backfill existing Spanish records from city_code/postal_code → community name +-- Uses COALESCE(city_code, postal_code) to get the best available province prefix. +-- The first 2 digits of both fields are the INE province code in Spain. +UPDATE "aed_locations" +SET "admin_level_1" = CASE LEFT(COALESCE(NULLIF("city_code", ''), NULLIF("postal_code", '')), 2) + WHEN '01' THEN 'País Vasco' + WHEN '02' THEN 'Castilla-La Mancha' + WHEN '03' THEN 'Comunitat Valenciana' + WHEN '04' THEN 'Andalucía' + WHEN '05' THEN 'Castilla y León' + WHEN '06' THEN 'Extremadura' + WHEN '07' THEN 'Illes Balears' + WHEN '08' THEN 'Cataluña' + WHEN '09' THEN 'Castilla y León' + WHEN '10' THEN 'Extremadura' + WHEN '11' THEN 'Andalucía' + WHEN '12' THEN 'Comunitat Valenciana' + WHEN '13' THEN 'Castilla-La Mancha' + WHEN '14' THEN 'Andalucía' + WHEN '15' THEN 'Galicia' + WHEN '16' THEN 'Castilla-La Mancha' + WHEN '17' THEN 'Cataluña' + WHEN '18' THEN 'Andalucía' + WHEN '19' THEN 'Castilla-La Mancha' + WHEN '20' THEN 'País Vasco' + WHEN '21' THEN 'Andalucía' + WHEN '22' THEN 'Aragón' + WHEN '23' THEN 'Andalucía' + WHEN '24' THEN 'Castilla y León' + WHEN '25' THEN 'Cataluña' + WHEN '26' THEN 'La Rioja' + WHEN '27' THEN 'Galicia' + WHEN '28' THEN 'Comunidad de Madrid' + WHEN '29' THEN 'Andalucía' + WHEN '30' THEN 'Región de Murcia' + WHEN '31' THEN 'Navarra' + WHEN '32' THEN 'Galicia' + WHEN '33' THEN 'Asturias' + WHEN '34' THEN 'Castilla y León' + WHEN '35' THEN 'Canarias' + WHEN '36' THEN 'Galicia' + WHEN '37' THEN 'Castilla y León' + WHEN '38' THEN 'Canarias' + WHEN '39' THEN 'Cantabria' + WHEN '40' THEN 'Castilla y León' + WHEN '41' THEN 'Andalucía' + WHEN '42' THEN 'Castilla y León' + WHEN '43' THEN 'Cataluña' + WHEN '44' THEN 'Aragón' + WHEN '45' THEN 'Castilla-La Mancha' + WHEN '46' THEN 'Comunitat Valenciana' + WHEN '47' THEN 'Castilla y León' + WHEN '48' THEN 'País Vasco' + WHEN '49' THEN 'Castilla y León' + WHEN '50' THEN 'Aragón' + WHEN '51' THEN 'Ceuta' + WHEN '52' THEN 'Melilla' + ELSE NULL +END +WHERE "admin_level_1" IS NULL + AND COALESCE(NULLIF("city_code", ''), NULLIF("postal_code", '')) IS NOT NULL; + +-- Step 3: Add index for geographic hierarchy queries +CREATE INDEX IF NOT EXISTS "idx_aed_locations_admin_level_1" ON "aed_locations" ("admin_level_1"); diff --git a/prisma/migrations/20260414300000_add_nominatim_verified_at/migration.sql b/prisma/migrations/20260414300000_add_nominatim_verified_at/migration.sql new file mode 100644 index 00000000..211f096d --- /dev/null +++ b/prisma/migrations/20260414300000_add_nominatim_verified_at/migration.sql @@ -0,0 +1,3 @@ +-- Add nominatim_verified_at to track which records have been verified via Nominatim +-- This allows the enrichment script to skip already-verified records on re-runs +ALTER TABLE "aed_locations" ADD COLUMN IF NOT EXISTS "nominatim_verified_at" TIMESTAMPTZ; diff --git a/prisma/schema.prisma b/prisma/schema.prisma index b71c74af..667cf011 100644 --- a/prisma/schema.prisma +++ b/prisma/schema.prisma @@ -342,6 +342,7 @@ model AedLocation { // Geographic information by city (no FK - multi-city support) city_name String? city_code String? + admin_level_1 String? // Region/State from reverse geocoding (e.g., "Comunidad de Madrid", "Île-de-France") district_code String? district_name String? neighborhood_code String? @@ -353,7 +354,8 @@ model AedLocation { access_instructions String? // How to access the AED // Geocoding enrichment validation (automatic coordinate validation) - geocoding_validation Json? // Validation result from geocoding enrichment: {status, distance_meters, original_coords, geocoded_coords, reason, validated_at} + geocoding_validation Json? // Validation result from geocoding enrichment: {status, distance_meters, original_coords, geocoded_coords, reason, validated_at} + nominatim_verified_at DateTime? // When admin_level_1 was last verified via Nominatim reverse geocoding aed Aed? address_validation AedAddressValidation? @@ -364,6 +366,7 @@ model AedLocation { @@index([postal_code]) @@index([city_name], map: "idx_aed_locations_city_name") @@index([city_code], map: "idx_aed_locations_city_code") + @@index([admin_level_1], map: "idx_aed_locations_admin_level_1") @@index([city_code, district_code]) @@index([city_code, postal_code]) @@map("aed_locations") diff --git a/public/58a9f826f33ed4f000b44c285246099e.txt b/public/58a9f826f33ed4f000b44c285246099e.txt new file mode 100644 index 00000000..5af20ed5 --- /dev/null +++ b/public/58a9f826f33ed4f000b44c285246099e.txt @@ -0,0 +1 @@ +58a9f826f33ed4f000b44c285246099e \ No newline at end of file diff --git a/public/7b8f16c9e24b4a02b2bf3e1f5c9d8a3e.txt b/public/7b8f16c9e24b4a02b2bf3e1f5c9d8a3e.txt new file mode 100644 index 00000000..3a7d6a85 --- /dev/null +++ b/public/7b8f16c9e24b4a02b2bf3e1f5c9d8a3e.txt @@ -0,0 +1 @@ +7b8f16c9e24b4a02b2bf3e1f5c9d8a3e \ No newline at end of file diff --git a/scripts/enrich-geography.ts b/scripts/enrich-geography.ts new file mode 100644 index 00000000..1aeaa7b3 --- /dev/null +++ b/scripts/enrich-geography.ts @@ -0,0 +1,419 @@ +/** + * Geographic Enrichment Script + * + * Runs locally to enrich AED records with geographic hierarchy data from Nominatim. + * Populates admin_level_1 (region/state) using coordinates as the source of truth. + * + * Safety features: + * - Skips records where Nominatim country != stored country_code (wrong coordinates) + * - Skips Null Island (0,0) coordinates + * - Caches geocode results for identical coordinates (saves API calls) + * - Marks processed records with nominatim_verified_at to support resume + * + * Usage: + * npx tsx scripts/enrich-geography.ts [options] + * + * Options: + * --dry-run Preview changes without writing to DB (default: true) + * --write Actually write changes to DB + * --limit N Process only N records (default: all) + * --offset N Skip first N records (default: 0) + * --only-unverified Only process records not yet verified by Nominatim (default: true) + * --all Process all records, even already verified ones + * --fix-city-code Also fix city_code prefix from Nominatim result + * --country ES Only process records from this country + * --report Generate CSV reports (mismatches + wrong countries) + */ + +import { PrismaPg } from "@prisma/adapter-pg"; +import { PrismaClient } from "../src/generated/client/client"; +import { reverseGeocode, type ReverseGeocodeResult } from "../src/lib/nominatim"; + +// --- Coordinate validation --- + +function isNullIsland(lat: number, lon: number): boolean { + return Math.abs(lat) < 0.1 && Math.abs(lon) < 0.1; +} + +// --- Geocode result cache --- +const geocodeCache = new Map(); + +function coordKey(lat: number, lon: number): string { + return `${lat.toFixed(6)},${lon.toFixed(6)}`; +} + +async function cachedReverseGeocode( + lat: number, + lon: number +): Promise { + const key = coordKey(lat, lon); + if (geocodeCache.has(key)) return geocodeCache.get(key)!; + const result = await reverseGeocode(lat, lon); + geocodeCache.set(key, result); + return result; +} + +// --- CLI --- + +interface Options { + dryRun: boolean; + limit: number | null; + offset: number; + onlyUnverified: boolean; + fixCityCode: boolean; + country: string | null; + report: boolean; +} + +function parseArgs(): Options { + const args = process.argv.slice(2); + const opts: Options = { + dryRun: true, + limit: null, + offset: 0, + onlyUnverified: true, + fixCityCode: false, + country: null, + report: false, + }; + + for (let i = 0; i < args.length; i++) { + switch (args[i]) { + case "--write": + opts.dryRun = false; + break; + case "--dry-run": + opts.dryRun = true; + break; + case "--limit": + opts.limit = parseInt(args[++i], 10); + break; + case "--offset": + opts.offset = parseInt(args[++i], 10); + break; + case "--only-unverified": + opts.onlyUnverified = true; + break; + case "--all": + opts.onlyUnverified = false; + break; + case "--fix-city-code": + opts.fixCityCode = true; + break; + case "--country": + opts.country = args[++i]?.toUpperCase(); + break; + case "--report": + opts.report = true; + break; + } + } + + return opts; +} + +interface Mismatch { + aedId: string; + cityName: string | null; + currentAdmin: string | null; + nominatimAdmin: string | null; + currentCityCode: string | null; + nominatimPostalCode: string | null; + lat: number; + lon: number; +} + +interface WrongCountry { + aedId: string; + name: string | null; + cityName: string | null; + storedCountry: string; + nominatimCountry: string; + lat: number; + lon: number; +} + +async function main() { + const opts = parseArgs(); + const connectionString = process.env.DATABASE_URL; + if (!connectionString) { + console.error("DATABASE_URL environment variable is required"); + process.exit(1); + } + const adapter = new PrismaPg({ connectionString }); + const prisma = new PrismaClient({ adapter }); + const startTime = Date.now(); + + console.log("=== Geographic Enrichment Script ==="); + console.log(`Mode: ${opts.dryRun ? "DRY RUN (no writes)" : "WRITE MODE"}`); + console.log(`Options:`, JSON.stringify(opts, null, 2)); + console.log(); + + try { + // Build WHERE clause + // eslint-disable-next-line @typescript-eslint/no-explicit-any + const where: any = { + latitude: { not: null }, + longitude: { not: null }, + }; + + if (opts.onlyUnverified) { + where.location = { nominatim_verified_at: null }; + } + + if (opts.country) { + where.country_code = opts.country; + } + + const totalCount = await prisma.aed.count({ where }); + const processCount = opts.limit ? Math.min(opts.limit, totalCount) : totalCount; + + console.log(`Total AEDs matching criteria: ${totalCount}`); + console.log(`Will process: ${processCount} (offset: ${opts.offset})`); + console.log( + `Estimated time: ~${Math.ceil((processCount * 1.1) / 60)} minutes (less with cache hits)` + ); + console.log(); + + const aeds = await prisma.aed.findMany({ + where, + select: { + id: true, + name: true, + latitude: true, + longitude: true, + country_code: true, + location: { + select: { + id: true, + city_name: true, + city_code: true, + postal_code: true, + admin_level_1: true, + }, + }, + }, + orderBy: { created_at: "asc" }, + skip: opts.offset, + take: processCount, + }); + + let enriched = 0; + let skipped = 0; + let failed = 0; + let mismatches = 0; + let cacheHits = 0; + let nullIslands = 0; + let wrongCountries = 0; + const mismatchList: Mismatch[] = []; + const wrongCountryList: WrongCountry[] = []; + + for (let i = 0; i < aeds.length; i++) { + const aed = aeds[i]; + const lat = aed.latitude!; + const lon = aed.longitude!; + const locationId = aed.location?.id; + + if (!locationId) { + skipped++; + continue; + } + + // Skip Null Island (0,0) — clearly invalid coordinates + if (isNullIsland(lat, lon)) { + nullIslands++; + skipped++; + // Still mark as verified so we don't reprocess + if (!opts.dryRun) { + await prisma.aedLocation.update({ + where: { id: locationId }, + data: { nominatim_verified_at: new Date() }, + }); + } + continue; + } + + // Progress + if ((i + 1) % 100 === 0 || i === 0) { + const pct = (((i + 1) / aeds.length) * 100).toFixed(1); + const elapsed = Math.round((Date.now() - startTime) / 1000); + console.log( + `[${pct}%] Processing ${i + 1}/${aeds.length} (enriched: ${enriched}, failed: ${failed}, wrong_country: ${wrongCountries}, cache: ${cacheHits}, elapsed: ${elapsed}s)` + ); + } + + // Check cache + const key = coordKey(lat, lon); + if (geocodeCache.has(key)) cacheHits++; + + const result = await cachedReverseGeocode(lat, lon); + + if (!result) { + failed++; + continue; + } + + // Country mismatch check — coordinates don't match stored country + if ( + aed.country_code && + result.countryCode && + aed.country_code.toUpperCase() !== result.countryCode.toUpperCase() + ) { + wrongCountries++; + wrongCountryList.push({ + aedId: aed.id, + name: aed.name, + cityName: aed.location?.city_name || null, + storedCountry: aed.country_code, + nominatimCountry: result.countryCode, + lat, + lon, + }); + // Mark as verified but DON'T update admin_level_1 — data is geographically incoherent + if (!opts.dryRun) { + await prisma.aedLocation.update({ + where: { id: locationId }, + data: { nominatim_verified_at: new Date() }, + }); + } + skipped++; + continue; + } + + // Check for city_code mismatches (Spain only) + const currentAdmin = aed.location?.admin_level_1; + const currentCityCode = aed.location?.city_code; + const currentPostal = aed.location?.postal_code; + + let cityCodeMismatch = false; + if (result.countryCode === "ES" && currentCityCode && result.postalCode) { + const currentPrefix = currentCityCode.substring(0, 2); + const nominatimPrefix = result.postalCode.substring(0, 2); + if (currentPrefix !== nominatimPrefix) { + cityCodeMismatch = true; + mismatches++; + mismatchList.push({ + aedId: aed.id, + cityName: aed.location?.city_name || null, + currentAdmin, + nominatimAdmin: result.adminLevel1, + currentCityCode, + nominatimPostalCode: result.postalCode, + lat, + lon, + }); + } + } + + // Build update + const update: Record = { + nominatim_verified_at: new Date(), + }; + + if (result.adminLevel1) { + update.admin_level_1 = result.adminLevel1; + } + + if (opts.fixCityCode && cityCodeMismatch && result.postalCode) { + const newPrefix = result.postalCode.substring(0, 2); + const oldSuffix = currentCityCode!.substring(2); + update.city_code = newPrefix + oldSuffix; + } + + // Fill missing postal_code + if (!currentPostal && result.postalCode) { + update.postal_code = result.postalCode; + } + + if (!opts.dryRun) { + await prisma.aedLocation.update({ + where: { id: locationId }, + data: update, + }); + } + + enriched++; + } + + console.log(); + console.log("=== Results ==="); + console.log(`Processed: ${aeds.length}`); + console.log(`Enriched: ${enriched}`); + console.log(`Skipped: ${skipped}`); + console.log(`Failed: ${failed}`); + console.log(`Cache hits: ${cacheHits}`); + console.log(`Null Islands: ${nullIslands}`); + console.log(`Mismatches: ${mismatches} (city_code prefix != Nominatim postal)`); + console.log(`Wrong country: ${wrongCountries} (coordinates outside stored country — excluded)`); + console.log(`Time: ${Math.round((Date.now() - startTime) / 1000)}s`); + + if (opts.dryRun && enriched > 0) { + console.log(); + console.log("This was a DRY RUN. Run with --write to apply changes."); + } + + // Reports + if (opts.report) { + const fs = await import("fs"); + const date = new Date().toISOString().slice(0, 10); + + if (mismatchList.length > 0) { + const csv = [ + "aed_id,city_name,current_admin,nominatim_admin,current_city_code,nominatim_postal,lat,lon", + ...mismatchList.map((m) => + [ + m.aedId, + `"${m.cityName || ""}"`, + `"${m.currentAdmin || ""}"`, + `"${m.nominatimAdmin || ""}"`, + m.currentCityCode, + m.nominatimPostalCode, + m.lat, + m.lon, + ].join(",") + ), + ].join("\n"); + const path = `geographic-mismatches-${date}.csv`; + fs.writeFileSync(path, csv); + console.log(`\nMismatch report: ${path}`); + } + + if (wrongCountryList.length > 0) { + const csv = [ + "aed_id,name,city_name,stored_country,nominatim_country,lat,lon", + ...wrongCountryList.map((w) => + [ + w.aedId, + `"${w.name || ""}"`, + `"${w.cityName || ""}"`, + w.storedCountry, + w.nominatimCountry, + w.lat, + w.lon, + ].join(",") + ), + ].join("\n"); + const path = `wrong-country-${date}.csv`; + fs.writeFileSync(path, csv); + console.log(`Wrong country report: ${path}`); + } + } + + if (wrongCountryList.length > 0) { + console.log(); + console.log(`=== Wrong Country (${wrongCountryList.length} total, showing first 10) ===`); + for (const w of wrongCountryList.slice(0, 10)) { + console.log( + ` ${w.aedId}: "${w.name}" stored=${w.storedCountry} actual=${w.nominatimCountry} (${w.lat}, ${w.lon})` + ); + } + } + } finally { + await prisma.$disconnect(); + } +} + +main().catch((err) => { + console.error("Fatal error:", err); + process.exit(1); +}); diff --git a/src/app/locations/[country]/[region]/[city]/page.tsx b/src/app/locations/[country]/[region]/[city]/page.tsx index be1af479..b1ca0626 100644 --- a/src/app/locations/[country]/[region]/[city]/page.tsx +++ b/src/app/locations/[country]/[region]/[city]/page.tsx @@ -29,15 +29,12 @@ interface Props { const resolveCityName = cache(async (citySlug: string): Promise => { const approxName = slugToApproxCityName(citySlug); + // Exact match only (case-insensitive). No `contains` — it can match + // wrong cities (e.g., slug "tor" matching "Torrevieja" instead of "Tor"). const match = await prisma.aed.findFirst({ where: { ...PUBLISHED_AED_WHERE, - location: { - OR: [ - { city_name: { equals: approxName, mode: "insensitive" } }, - { city_name: { contains: approxName, mode: "insensitive" } }, - ], - }, + location: { city_name: { equals: approxName, mode: "insensitive" } }, }, include: { location: { select: { city_name: true } } }, }); diff --git a/src/app/locations/[country]/[region]/page.tsx b/src/app/locations/[country]/[region]/page.tsx index 8af0b385..e3662be2 100644 --- a/src/app/locations/[country]/[region]/page.tsx +++ b/src/app/locations/[country]/[region]/page.tsx @@ -27,25 +27,33 @@ interface CityInRegion { count: number; } -const getRegionCities = cache(async (ineCodes: string[]): Promise => { - try { - return (await prisma.$queryRaw` - SELECT l.city_name, COUNT(*)::int as "count" - FROM aeds a - JOIN aed_locations l ON l.id = a.location_id - WHERE a.status = 'PUBLISHED' - AND a.publication_mode != 'NONE' - AND l.city_name IS NOT NULL - AND l.city_name != '' - AND l.city_code IS NOT NULL - AND LEFT(l.city_code, 2) = ANY(${ineCodes}) - GROUP BY l.city_name - ORDER BY COUNT(*) DESC - `) as CityInRegion[]; - } catch { - return []; +/** + * Get cities in a region using admin_level_1 as primary source, + * falling back to INE province codes for records not yet enriched. + */ +const getRegionCities = cache( + async (communityName: string, ineCodes: string[]): Promise => { + try { + return (await prisma.$queryRaw` + SELECT l.city_name, COUNT(*)::int as "count" + FROM aeds a + JOIN aed_locations l ON l.id = a.location_id + WHERE a.status = 'PUBLISHED' + AND a.publication_mode != 'NONE' + AND l.city_name IS NOT NULL + AND l.city_name != '' + AND ( + l.admin_level_1 = ${communityName} + OR (l.admin_level_1 IS NULL AND COALESCE(LEFT(NULLIF(l.city_code, ''), 2), LEFT(NULLIF(l.postal_code, ''), 2)) = ANY(${ineCodes})) + ) + GROUP BY l.city_name + ORDER BY COUNT(*) DESC + `) as CityInRegion[]; + } catch { + return []; + } } -}); +); export async function generateStaticParams() { return COMMUNITIES.map((c) => ({ country: "spain", region: c.slug })); @@ -58,7 +66,7 @@ export async function generateMetadata({ params }: Props): Promise { if (!country || !community) return { title: "Región no encontrada | DeaMap" }; - const cities = await getRegionCities(community.provinceIneCodes); + const cities = await getRegionCities(community.name, community.provinceIneCodes); const totalCount = cities.reduce((sum, c) => sum + c.count, 0); const title = `Desfibriladores en ${community.name} — ${totalCount.toLocaleString("es-ES")} DEAs en ${cities.length} ciudades`; @@ -87,7 +95,7 @@ export default async function RegionPage({ params }: Props) { if (!country || !community) notFound(); - const cities = await getRegionCities(community.provinceIneCodes); + const cities = await getRegionCities(community.name, community.provinceIneCodes); const totalCount = cities.reduce((sum, c) => sum + c.count, 0); const avg = cities.length > 0 ? Math.round(totalCount / cities.length) : 0; diff --git a/src/app/locations/[country]/page.tsx b/src/app/locations/[country]/page.tsx index b7dac244..af36ba3c 100644 --- a/src/app/locations/[country]/page.tsx +++ b/src/app/locations/[country]/page.tsx @@ -1,17 +1,17 @@ import { Globe, Heart, MapPin, ArrowRight, Building2 } from "lucide-react"; import type { Metadata } from "next"; import Link from "next/link"; -import { notFound, redirect } from "next/navigation"; +import { notFound, permanentRedirect } from "next/navigation"; import { PUBLISHED_AED_WHERE } from "@/lib/aed-status"; import { prisma } from "@/lib/db"; import { - COMMUNITIES, COUNTRY_BY_SLUG, - communityForIneCode, communityPath, cityPath, slugToApproxCityName, + resolveRegionSlug, + resolveRegionName, } from "@/lib/geography"; import { safeJsonLd } from "@/lib/json-ld"; @@ -36,35 +36,52 @@ async function getCountryStats(countryCode: string): Promise<{ }> { try { const raw = (await prisma.$queryRaw` - SELECT LEFT(l.city_code, 2) as "ine_code", l.city_name, COUNT(*)::int as "aed_count" + SELECT l.admin_level_1, + COALESCE(LEFT(NULLIF(l.city_code, ''), 2), LEFT(NULLIF(l.postal_code, ''), 2)) as "ine_code", + l.city_name, + COUNT(*)::int as "aed_count" FROM aeds a JOIN aed_locations l ON l.id = a.location_id WHERE a.status = 'PUBLISHED' AND a.publication_mode != 'NONE' AND a.country_code = ${countryCode} - AND l.city_code IS NOT NULL - AND l.city_code != '' AND l.city_name IS NOT NULL AND l.city_name != '' - GROUP BY LEFT(l.city_code, 2), l.city_name - `) as { ine_code: string; city_name: string; aed_count: number }[]; + AND (l.admin_level_1 IS NOT NULL OR COALESCE(NULLIF(l.city_code, ''), NULLIF(l.postal_code, '')) IS NOT NULL) + GROUP BY l.admin_level_1, COALESCE(LEFT(NULLIF(l.city_code, ''), 2), LEFT(NULLIF(l.postal_code, ''), 2)), l.city_name + `) as { + admin_level_1: string | null; + ine_code: string | null; + city_name: string; + aed_count: number; + }[]; - const communityAgg = new Map }>(); + const communityAgg = new Map< + string, + { name: string; totalAeds: number; cities: Set } + >(); for (const row of raw) { - const community = communityForIneCode(row.ine_code); - if (!community) continue; - const existing = communityAgg.get(community.slug) || { totalAeds: 0, cities: new Set() }; + const regionSlug = resolveRegionSlug(row.admin_level_1, row.ine_code); + const regionName = resolveRegionName(row.admin_level_1, row.ine_code); + if (!regionSlug || !regionName) continue; + const existing = communityAgg.get(regionSlug) || { + name: regionName, + totalAeds: 0, + cities: new Set(), + }; existing.totalAeds += row.aed_count; existing.cities.add(row.city_name); - communityAgg.set(community.slug, existing); + communityAgg.set(regionSlug, existing); } - const communities: CommunityStats[] = COMMUNITIES.filter((c) => communityAgg.has(c.slug)) - .map((c) => { - const agg = communityAgg.get(c.slug)!; - return { name: c.name, slug: c.slug, totalAeds: agg.totalAeds, cityCount: agg.cities.size }; - }) + const communities: CommunityStats[] = [...communityAgg.entries()] + .map(([slug, agg]) => ({ + name: agg.name, + slug, + totalAeds: agg.totalAeds, + cityCount: agg.cities.size, + })) .sort((a, b) => b.totalAeds - a.totalAeds); const totalAeds = communities.reduce((sum, c) => sum + c.totalAeds, 0); @@ -113,21 +130,19 @@ async function tryLegacyCityRedirect(citySlug: string): Promise { const aed = await prisma.aed.findFirst({ where: { ...PUBLISHED_AED_WHERE, - location: { - OR: [ - { city_name: { equals: cityName, mode: "insensitive" } }, - { city_name: { contains: cityName, mode: "insensitive" } }, - ], - }, + location: { city_name: { equals: cityName, mode: "insensitive" } }, }, - include: { location: { select: { city_name: true, city_code: true } } }, + include: { location: { select: { city_name: true, city_code: true, admin_level_1: true } } }, }); - if (aed?.location?.city_code && aed.location.city_name) { - const ineCode = aed.location.city_code.substring(0, 2); - const community = communityForIneCode(ineCode); - if (community) { - redirect(cityPath("spain", community.slug, aed.location.city_name)); + if (aed?.location?.city_name) { + // Resolve region: admin_level_1 (Nominatim) → city_code INE fallback + const regionSlug = resolveRegionSlug( + aed.location.admin_level_1 ?? null, + aed.location.city_code?.substring(0, 2) ?? null + ); + if (regionSlug) { + permanentRedirect(cityPath("spain", regionSlug, aed.location.city_name)); } } diff --git a/src/app/locations/provincia/[province]/page.tsx b/src/app/locations/provincia/[province]/page.tsx index 53f896a4..f9fbab15 100644 --- a/src/app/locations/provincia/[province]/page.tsx +++ b/src/app/locations/provincia/[province]/page.tsx @@ -1,11 +1,11 @@ -import { notFound, redirect } from "next/navigation"; +import { notFound, permanentRedirect } from "next/navigation"; import { communityForIneCode, communityPath } from "@/lib/geography"; import { PROVINCE_BY_SLUG } from "@/lib/provinces"; /** * Legacy redirect: /locations/provincia/[province] → /locations/spain/[community] - * Maps province slug to its community and issues a 301 redirect. + * Maps province slug to its community and issues a 308 permanent redirect. */ interface Props { @@ -21,5 +21,5 @@ export default async function LegacyProvinceRedirect({ params }: Props) { const community = communityForIneCode(province.ineCode); if (!community) notFound(); - redirect(communityPath("spain", community)); + permanentRedirect(communityPath("spain", community)); } diff --git a/src/app/sitemap.ts b/src/app/sitemap.ts index 505e9964..fa989663 100644 --- a/src/app/sitemap.ts +++ b/src/app/sitemap.ts @@ -3,13 +3,17 @@ import type { MetadataRoute } from "next"; import { prisma } from "@/lib/db"; import { COMMUNITIES, - communityForIneCode, + COMMUNITY_BY_SLUG, communityPath, countryPath, cityPath, + resolveRegionSlug, } from "@/lib/geography"; import { GUIDE_SLUGS } from "@/lib/guides"; +/** Google sitemap limit: max 50,000 URLs per file */ +const MAX_SITEMAP_URLS = 50_000; + export default async function sitemap(): Promise { const baseUrl = "https://deamap.es"; @@ -30,26 +34,34 @@ export default async function sitemap(): Promise { ]; try { - // Get all cities with their province INE codes + // Get all cities with their region info + // Priority: admin_level_1 (Nominatim) → city_code/postal_code prefix (INE fallback) const cities = (await prisma.$queryRaw` - SELECT l.city_name, LEFT(l.city_code, 2) as "ine_code", COUNT(*)::int as "count" + SELECT l.city_name, + l.admin_level_1, + COALESCE(LEFT(NULLIF(l.city_code, ''), 2), LEFT(NULLIF(l.postal_code, ''), 2)) as "ine_code", + COUNT(*)::int as "count" FROM aeds a JOIN aed_locations l ON l.id = a.location_id WHERE a.status = 'PUBLISHED' AND a.publication_mode != 'NONE' AND l.city_name IS NOT NULL AND l.city_name != '' - AND l.city_code IS NOT NULL - AND l.city_code != '' - GROUP BY l.city_name, LEFT(l.city_code, 2) - ORDER BY l.city_name - `) as { city_name: string; ine_code: string; count: number }[]; + AND (l.admin_level_1 IS NOT NULL OR COALESCE(NULLIF(l.city_code, ''), NULLIF(l.postal_code, '')) IS NOT NULL) + GROUP BY l.city_name, l.admin_level_1, COALESCE(LEFT(NULLIF(l.city_code, ''), 2), LEFT(NULLIF(l.postal_code, ''), 2)) + ORDER BY "count" DESC + `) as { + city_name: string; + admin_level_1: string | null; + ine_code: string | null; + count: number; + }[]; // Community pages (only those with data) const communitiesWithData = new Set(); for (const city of cities) { - const community = communityForIneCode(city.ine_code); - if (community) communitiesWithData.add(community.slug); + const regionSlug = resolveRegionSlug(city.admin_level_1, city.ine_code); + if (regionSlug) communitiesWithData.add(regionSlug); } const communityPages: MetadataRoute.Sitemap = COMMUNITIES.filter((c) => @@ -61,13 +73,22 @@ export default async function sitemap(): Promise { priority: 0.85, })); - // City pages + // City pages — deduplicate by URL to avoid duplicates from mixed admin_level_1/INE data const cityPages: MetadataRoute.Sitemap = []; - for (const { city_name, ine_code } of cities) { - const community = communityForIneCode(ine_code); - if (!community) continue; + const seenCityUrls = new Set(); + const budget = + MAX_SITEMAP_URLS - staticPages.length - communityPages.length - GUIDE_SLUGS.length; + + for (const { city_name, admin_level_1, ine_code } of cities) { + if (cityPages.length >= budget) break; + const regionSlug = resolveRegionSlug(admin_level_1, ine_code); + // Only emit URLs for known communities to avoid 404s from foreign/unrecognized regions + if (!regionSlug || !COMMUNITY_BY_SLUG.has(regionSlug)) continue; + const url = `${baseUrl}${cityPath("spain", regionSlug, city_name)}`; + if (seenCityUrls.has(url)) continue; + seenCityUrls.add(url); cityPages.push({ - url: `${baseUrl}${cityPath("spain", community.slug, city_name)}`, + url, lastModified: new Date(), changeFrequency: "weekly" as const, priority: 0.8, @@ -83,7 +104,8 @@ export default async function sitemap(): Promise { })); return [...staticPages, ...communityPages, ...cityPages, ...guidePages]; - } catch { + } catch (err) { + console.error("[sitemap] Failed to generate dynamic pages:", err); return staticPages; } } diff --git a/src/batch/application/processors/AedCsvImportProcessor.ts b/src/batch/application/processors/AedCsvImportProcessor.ts index fed2a553..92b7fbc0 100644 --- a/src/batch/application/processors/AedCsvImportProcessor.ts +++ b/src/batch/application/processors/AedCsvImportProcessor.ts @@ -24,6 +24,7 @@ import { DownloadAndUploadImageUseCase } from "@/storage/application/use-cases/D import * as os from "os"; import * as path from "path"; import { invalidateAllAedCaches } from "@/lib/cache-invalidation"; +import { reverseGeocode } from "@/lib/nominatim"; interface CsvRecord { [key: string]: string; @@ -615,6 +616,17 @@ export class AedCsvImportProcessor extends BaseBatchJobProcessor { // 1. Location const location = await tx.aedLocation.create({ @@ -268,6 +280,7 @@ async function createAed( access_instructions: toStringOrNull(data.accessDescription), city_name: toStringOrNull(data.city), city_code: toStringOrNull(data.cityCode), + admin_level_1: adminLevel1, district_name: toStringOrNull(data.district), }, }); @@ -598,6 +611,24 @@ async function updateAed( ): Promise { const { dataSourceId, sourceOrigin, externalId } = opts; + // Reverse geocode OUTSIDE transaction (network I/O) + // Skip if coordinates haven't changed — avoids redundant Nominatim calls on repeated syncs + let updateAdminLevel1: string | null = null; + const updateLat = parseCoordinate(data.latitude); + const updateLon = parseCoordinate(data.longitude); + const coordsChanged = + updateLat != null && + updateLon != null && + (updateLat !== Number(aed.latitude) || updateLon !== Number(aed.longitude)); + if (coordsChanged) { + try { + const geo = await reverseGeocode(updateLat!, updateLon!, 1); + if (geo) updateAdminLevel1 = geo.adminLevel1; + } catch { + // Non-blocking + } + } + await prisma.$transaction(async (tx) => { // ============================== // TIER 1: VERIFIED AED PROTECTION @@ -646,6 +677,7 @@ async function updateAed( access_instructions: toStringOrNull(data.accessDescription), city_name: toStringOrNull(data.city), city_code: toStringOrNull(data.cityCode), + admin_level_1: updateAdminLevel1 || undefined, district_name: toStringOrNull(data.district), }, }); diff --git a/src/lib/geography.ts b/src/lib/geography.ts index 88ad587c..f01b2e2d 100644 --- a/src/lib/geography.ts +++ b/src/lib/geography.ts @@ -77,9 +77,124 @@ export function absoluteCityUrl( // --- Lookup helpers --- +/** Normalize a string for accent-insensitive matching */ +function normalizeForMatch(s: string): string { + return s + .toLowerCase() + .normalize("NFD") + .replace(/[\u0300-\u036f]/g, ""); +} + +/** + * Map of normalized community names → Community for fuzzy lookup. + * Supports accent-insensitive matching so Nominatim results like + * "Comunidad de Madrid" or "Cataluña" match correctly. + * + * Also includes English aliases that Nominatim may return depending + * on the accept-language header or data quality. + */ +const COMMUNITY_ALIASES: Record = { + "community of madrid": "Comunidad de Madrid", + catalonia: "Cataluña", + "valencian community": "Comunitat Valenciana", + "basque country": "País Vasco", + "balearic islands": "Illes Balears", + "canary islands": "Canarias", + "region of murcia": "Región de Murcia", + "castile and leon": "Castilla y León", + "castile-la mancha": "Castilla-La Mancha", + "principality of asturias": "Asturias", + "chartered community of navarre": "Navarra", + navarre: "Navarra", + aragon: "Aragón", + andalusia: "Andalucía", + extremadura: "Extremadura", + galicia: "Galicia", + cantabria: "Cantabria", + "la rioja": "La Rioja", + ceuta: "Ceuta", + melilla: "Melilla", +}; + +const COMMUNITY_BY_NAME = new Map(); +for (const c of COMMUNITIES) { + COMMUNITY_BY_NAME.set(normalizeForMatch(c.name), c); +} +for (const [alias, canonical] of Object.entries(COMMUNITY_ALIASES)) { + const community = COMMUNITY_BY_NAME.get(normalizeForMatch(canonical)); + if (community) COMMUNITY_BY_NAME.set(normalizeForMatch(alias), community); +} + +/** O(1) lookup: INE province code → Community */ +const COMMUNITY_BY_INE_CODE = new Map(); +for (const c of COMMUNITIES) { + for (const ineCode of c.provinceIneCodes) { + COMMUNITY_BY_INE_CODE.set(ineCode, c); + } +} + /** Given a province INE code, find which community it belongs to */ export function communityForIneCode(ineCode: string): Community | undefined { - return COMMUNITIES.find((c) => c.provinceIneCodes.includes(ineCode)); + return COMMUNITY_BY_INE_CODE.get(ineCode); +} + +/** + * Given an admin_level_1 name (from Nominatim), find the matching community. + * Uses accent-insensitive matching. Works for Spain; for other countries + * returns undefined (use regionSlugFromAdminLevel1 instead). + */ +export function communityForAdminLevel1(adminLevel1: string): Community | undefined { + return COMMUNITY_BY_NAME.get(normalizeForMatch(adminLevel1)); +} + +/** + * Generate a URL-safe slug from any admin_level_1 name. + * Used for countries without a static community map (non-Spain). + */ +export function regionSlugFromAdminLevel1(adminLevel1: string): string { + return toSlug(adminLevel1); +} + +/** + * Resolve a region slug from admin_level_1 OR ine_code, with fallback chain. + * This is the primary function for determining the region in geographic queries. + * + * Priority: admin_level_1 → ine_code → null + */ +export function resolveRegionSlug( + adminLevel1: string | null, + ineCode: string | null +): string | null { + if (adminLevel1) { + const community = communityForAdminLevel1(adminLevel1); + if (community) return community.slug; + // For non-Spain countries, generate slug dynamically + return regionSlugFromAdminLevel1(adminLevel1); + } + if (ineCode) { + const community = communityForIneCode(ineCode); + if (community) return community.slug; + } + return null; +} + +/** + * Resolve a region display name from admin_level_1 OR ine_code. + */ +export function resolveRegionName( + adminLevel1: string | null, + ineCode: string | null +): string | null { + if (adminLevel1) { + const community = communityForAdminLevel1(adminLevel1); + if (community) return community.name; + return adminLevel1; // Use raw name for non-Spain countries + } + if (ineCode) { + const community = communityForIneCode(ineCode); + if (community) return community.name; + } + return null; } /** Given a province INE code, build the full path for a city in that province */ diff --git a/src/lib/json-ld.ts b/src/lib/json-ld.ts index 32a03195..ab1fe354 100644 --- a/src/lib/json-ld.ts +++ b/src/lib/json-ld.ts @@ -1,7 +1,10 @@ /** * Safely serialize an object for inline JSON-LD script tags. - * Escapes ``. + * Escapes characters that can break HTML/XML parsers when embedded in