diff --git a/package-lock.json b/package-lock.json index d9fd2f1..17fb8e4 100644 --- a/package-lock.json +++ b/package-lock.json @@ -19,6 +19,7 @@ "express": "^4.18.2", "express-rate-limit": "^6.7.0", "form-data": "^4.0.5", + "https-proxy-agent": "^7.0.6", "json-to-pretty-yaml": "^1.2.2", "json-xml-parse": "^1.2.8", "morgan": "^1.10.0", diff --git a/package.json b/package.json index f756e60..af9eb50 100644 --- a/package.json +++ b/package.json @@ -34,6 +34,7 @@ "express": "^4.18.2", "express-rate-limit": "^6.7.0", "form-data": "^4.0.5", + "https-proxy-agent": "^7.0.6", "json-to-pretty-yaml": "^1.2.2", "json-xml-parse": "^1.2.8", "morgan": "^1.10.0", diff --git a/src/routes/v2/peruserver/trucky/live-jobs/index.js b/src/routes/v2/peruserver/trucky/live-jobs/index.js index 28854a5..2353192 100644 --- a/src/routes/v2/peruserver/trucky/live-jobs/index.js +++ b/src/routes/v2/peruserver/trucky/live-jobs/index.js @@ -1,9 +1,15 @@ const { Router } = require('express'); const axios = require('axios'); +const { + startPeriodicProxyUpdate, + getRandomProxy, + getCachedProxies, +} = require('../../../../../utils/proxy-manager'); const router = Router(); const TRUCKY_BASE_URL = 'https://e.truckyapp.com/api/v1/company'; +const PERUSERVER_COMPANIES_URL = 'https://peruserver.pe/wp-json/psv/v1/companies'; const TRUCKY_HEADERS = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64)', Accept: 'application/json, text/plain, */*', @@ -17,9 +23,14 @@ const DEFAULT_DAYS = 3; const MAX_JOBS = 300; const DEFAULT_PER_PAGE = 25; const MAX_PER_PAGE = 100; -const COMPANY_BATCH_SIZE = 6; +const DEFAULT_COMPANY_BATCH_SIZE = Math.min( + 6, + Math.max(1, Number.parseInt(process.env.TRUCKY_CONCURRENCY || '4', 10) || 4) +); const SNAPSHOT_TTL_MS = 45 * 1000; const SNAPSHOT_DB_MAX_AGE_MS = 90 * 1000; +const TRUCKY_MAX_RETRIES = Math.max(1, Number.parseInt(process.env.TRUCKY_MAX_RETRIES || '3', 10) || 3); +const TRUCKY_RETRY_BASE_MS = Math.max(200, Number.parseInt(process.env.TRUCKY_RETRY_BASE_MS || '700', 10) || 700); const snapshotCache = new Map(); @@ -30,6 +41,8 @@ const SUPABASE_ANON_KEY = process.env.SUPABASE_SERVICE_ROLE_KEY || ''; +startPeriodicProxyUpdate(); + const sanitizeCompanyName = (name) => { if (typeof name !== 'string') return ''; return name.replace(/\s+/g, ' ').trim(); @@ -112,29 +125,115 @@ const parseCsvNumberSet = (rawValue) => { ); }; -const getSnapshotCacheKey = (days) => `days-${days}`; +const parseProxyForAxios = (rawProxy) => { + if (!rawProxy) return null; + + try { + const withProtocol = /^(http|https):\/\//i.test(rawProxy) + ? rawProxy + : `http://${rawProxy}`; + const url = new URL(withProtocol); + + return { + protocol: url.protocol.replace(':', ''), + host: url.hostname, + port: Number(url.port || (url.protocol === 'https:' ? 443 : 80)), + auth: url.username + ? { + username: decodeURIComponent(url.username), + password: decodeURIComponent(url.password || ''), + } + : undefined, + }; + } catch (error) { + return null; + } +}; + +const getTruckyProxyCandidates = (query) => { + const queryProxyRaw = query && query.truckyProxy ? String(query.truckyProxy).trim() : ''; + const queryProxyListRaw = query && query.truckyProxyList ? String(query.truckyProxyList).trim() : ''; + + const queryProxies = queryProxyListRaw + ? queryProxyListRaw.split(',').map((item) => item.trim()).filter(Boolean) + : []; + + if (queryProxyRaw) { + queryProxies.unshift(queryProxyRaw); + } + + return queryProxies.filter(Boolean); +}; + +const sleep = (ms) => new Promise((resolve) => setTimeout(resolve, ms)); + +const isRetryableTruckyStatus = (status) => [403, 429, 500, 502, 503, 504].includes(status); + +const truckyRequestWithRetry = async ({ url, params, timeout = 12000, proxyCandidates = [], useProxyPool = true }) => { + let lastError = null; + + for (let attempt = 0; attempt < TRUCKY_MAX_RETRIES; attempt += 1) { + const poolProxy = useProxyPool && proxyCandidates.length === 0 ? getRandomProxy() : null; + const proxyRaw = proxyCandidates.length > 0 + ? proxyCandidates[attempt % proxyCandidates.length] + : poolProxy; + const proxy = parseProxyForAxios(proxyRaw); + + try { + const response = await axios.get(url, { + params, + headers: TRUCKY_HEADERS, + timeout, + proxy: proxy || undefined, + }); + + return response; + } catch (error) { + lastError = error; + const status = Number(error && error.response && error.response.status); + const shouldRetry = isRetryableTruckyStatus(status); + + if (!shouldRetry || attempt === TRUCKY_MAX_RETRIES - 1) { + throw error; + } + + const waitMs = TRUCKY_RETRY_BASE_MS * (attempt + 1) + Math.floor(Math.random() * 250); + await sleep(waitMs); + } + } + + throw lastError || new Error('Error desconocido consultando Trucky'); +}; + +const normalizeCompaniesSource = (rawSource) => { + const source = String(rawSource || 'auto').trim().toLowerCase(); + if (['supabase', 'wordpress', 'auto'].includes(source)) return source; + return 'auto'; +}; + +const getSnapshotCacheKey = ({ days, companiesSource }) => `days-${days}-source-${companiesSource}`; -const getSnapshotFromMemory = (days) => { - const entry = snapshotCache.get(getSnapshotCacheKey(days)); +const getSnapshotFromMemory = ({ days, companiesSource }) => { + const entry = snapshotCache.get(getSnapshotCacheKey({ days, companiesSource })); if (!entry) return null; if (Date.now() >= entry.expiresAt) return null; return entry.payload; }; -const saveSnapshotInMemory = (days, payload) => { - snapshotCache.set(getSnapshotCacheKey(days), { +const saveSnapshotInMemory = ({ days, companiesSource }, payload) => { + snapshotCache.set(getSnapshotCacheKey({ days, companiesSource }), { payload, expiresAt: Date.now() + SNAPSHOT_TTL_MS, }); }; -const getSnapshotFromSupabase = async (days) => { +const getSnapshotFromSupabase = async ({ days, companiesSource }) => { const env = getSupabaseCacheEnv(); const readKey = (env && (env.anonKey || env.key)) || ''; if (!env || !readKey) return null; try { - const cacheKey = getSnapshotCacheKey(days); + const cacheKey = getSnapshotCacheKey({ days, companiesSource }); const response = await axios.get( `${env.url}/rest/v1/trucky_live_jobs_snapshots?select=payload,updated_at&cache_key=eq.${encodeURIComponent(cacheKey)}&limit=1`, { @@ -165,7 +264,7 @@ const getSnapshotFromSupabase = async (days) => { } }; -const saveSnapshotInSupabase = async (days, payload) => { +const saveSnapshotInSupabase = async ({ days, companiesSource }, payload) => { const env = getSupabaseCacheEnv(); if (!env || !env.serviceRoleKey) return; @@ -173,7 +272,7 @@ const saveSnapshotInSupabase = async (days, payload) => { await axios.post( `${env.url}/rest/v1/trucky_live_jobs_snapshots`, [{ - cache_key: getSnapshotCacheKey(days), + cache_key: getSnapshotCacheKey({ days, companiesSource }), payload, updated_at: new Date().toISOString(), }], @@ -473,7 +572,7 @@ const upsertUnresolvedPoints = async (points) => { } }; -const fetchRegisteredCompanies = async () => { +const fetchRegisteredCompaniesFromSupabase = async () => { if (!SUPABASE_URL || !SUPABASE_ANON_KEY) { return []; } @@ -503,8 +602,71 @@ const fetchRegisteredCompanies = async () => { } }; -const fetchCompanyJobs = async (company, cutoffMs) => { - const response = await axios.get(`${TRUCKY_BASE_URL}/${company.id}/jobs`, { +const fetchRegisteredCompaniesFromWordPress = async () => { + try { + const response = await axios.get(PERUSERVER_COMPANIES_URL, { + timeout: 15000, + }); + + const companies = Array.isArray(response.data) ? response.data : []; + + return companies + .map((company) => { + if (Number.isFinite(company)) { + return { + id: Number(company), + name: `Empresa ${company}`, + }; + } + + const id = Number(company && (company.id || company.company_id || company.empresaId)); + if (!Number.isFinite(id) || id <= 0) return null; + + return { + id, + name: sanitizeCompanyName(company.name || company.company_name) || `Empresa ${id}`, + }; + }) + .filter((row) => row && Number.isFinite(row.id) && row.id > 0); + } catch (error) { + return []; + } +}; + +const fetchRegisteredCompanies = async (companiesSource = 'auto') => { + const source = normalizeCompaniesSource(companiesSource); + + if (source === 'supabase') { + return { + companies: await fetchRegisteredCompaniesFromSupabase(), + sourceUsed: 'supabase', + }; + } + + if (source === 'wordpress') { + return { + companies: await fetchRegisteredCompaniesFromWordPress(), + sourceUsed: 'wordpress', + }; + } + + const supabaseCompanies = await fetchRegisteredCompaniesFromSupabase(); + if (supabaseCompanies.length > 0) { + return { + companies: supabaseCompanies, + sourceUsed: 'supabase', + }; + } + + return { + companies: await fetchRegisteredCompaniesFromWordPress(), + sourceUsed: 'wordpress', + }; +}; + +const fetchCompanyJobs = async (company, cutoffMs, proxyCandidates, useProxyPool) => { + const response = await truckyRequestWithRetry({ + url: `${TRUCKY_BASE_URL}/${company.id}/jobs`, params: { top: 0, page: 1, @@ -513,8 +675,9 @@ const fetchCompanyJobs = async (company, cutoffMs) => { sortingField: 'updated_at', sortingDirection: 'desc', }, - headers: TRUCKY_HEADERS, timeout: 12000, + proxyCandidates, + useProxyPool, }); const payload = response.data || {}; @@ -527,34 +690,26 @@ const fetchCompanyJobs = async (company, cutoffMs) => { .filter((job) => job.id > 0); }; -const buildCoreSnapshot = async (days) => { - const cutoffMs = Date.now() - days * 24 * 60 * 60 * 1000; - const payload = { - fetchedAt: new Date().toISOString(), - days, - companiesProcessed: 0, - jobs: [], - errors: [], - }; - - const companies = await fetchRegisteredCompanies(); - payload.companiesProcessed = companies.length; +const collectJobsFromCompanies = async (companies, cutoffMs, options = {}) => { + const batchSize = Math.max(1, Number.parseInt(String(options.companyBatchSize || DEFAULT_COMPANY_BATCH_SIZE), 10) || DEFAULT_COMPANY_BATCH_SIZE); + const proxyCandidates = Array.isArray(options.proxyCandidates) ? options.proxyCandidates : []; + const useProxyPool = options.useProxyPool !== false; + const jobs = []; + const errors = []; + let forbiddenCount = 0; - if (!companies.length) { - return payload; - } - - const allJobs = []; - - for (let index = 0; index < companies.length; index += COMPANY_BATCH_SIZE) { - const chunk = companies.slice(index, index + COMPANY_BATCH_SIZE); + for (let index = 0; index < companies.length; index += batchSize) { + const chunk = companies.slice(index, index + batchSize); const results = await Promise.all( chunk.map(async (company) => { try { - const jobs = await fetchCompanyJobs(company, cutoffMs); - return { companyId: company.id, jobs, error: null }; + const companyJobs = await fetchCompanyJobs(company, cutoffMs, proxyCandidates, useProxyPool); + return { companyId: company.id, jobs: companyJobs, error: null }; } catch (error) { + const statusCode = Number(error && error.response && error.response.status); + if (statusCode === 403) forbiddenCount += 1; + const message = error instanceof Error ? error.message : 'Error desconocido'; return { companyId: company.id, jobs: [], error: message }; } @@ -562,37 +717,110 @@ const buildCoreSnapshot = async (days) => { ); for (const result of results) { - allJobs.push(...result.jobs); + jobs.push(...result.jobs); if (result.error) { - payload.errors.push({ companyId: result.companyId, message: result.error }); + errors.push({ companyId: result.companyId, message: result.error }); } } } - payload.jobs = allJobs + return { + jobs, + errors, + forbiddenCount, + }; +}; + +const buildCoreSnapshot = async ({ days, companiesSource, companyBatchSize, proxyCandidates, useProxyPool }) => { + const cutoffMs = Date.now() - days * 24 * 60 * 60 * 1000; + const payload = { + fetchedAt: new Date().toISOString(), + days, + companiesSourceRequested: normalizeCompaniesSource(companiesSource), + companiesSourceUsed: 'unknown', + companiesProcessed: 0, + jobs: [], + errors: [], + }; + + const { companies, sourceUsed } = await fetchRegisteredCompanies(companiesSource); + payload.companiesSourceUsed = sourceUsed; + payload.companiesProcessed = companies.length; + + if (!companies.length) { + return payload; + } + + const initialResult = await collectJobsFromCompanies(companies, cutoffMs, { + companyBatchSize, + proxyCandidates, + useProxyPool, + }); + let jobs = initialResult.jobs; + let errors = initialResult.errors; + + const forbiddenRatio = companies.length > 0 + ? initialResult.forbiddenCount / companies.length + : 0; + + // Si en modo auto Supabase devuelve muchos 403, reintentar con WordPress en la misma petición. + if ( + payload.companiesSourceRequested === 'auto' && + sourceUsed === 'supabase' && + jobs.length === 0 && + forbiddenRatio >= 0.7 + ) { + const wordpressCompanies = await fetchRegisteredCompaniesFromWordPress(); + + if (wordpressCompanies.length > 0) { + const fallbackResult = await collectJobsFromCompanies(wordpressCompanies, cutoffMs, { + companyBatchSize, + proxyCandidates, + useProxyPool, + }); + payload.companiesProcessed = wordpressCompanies.length; + payload.companiesSourceUsed = 'wordpress'; + jobs = fallbackResult.jobs; + errors = fallbackResult.errors; + } + } + + payload.jobs = jobs .sort((a, b) => new Date(b.updatedAt).getTime() - new Date(a.updatedAt).getTime()) .slice(0, MAX_JOBS); + payload.errors = errors; return payload; }; -const getCoreSnapshot = async (days, useDbCache) => { - const memorySnapshot = getSnapshotFromMemory(days); +const getCoreSnapshot = async ({ days, useDbCache, companiesSource, companyBatchSize, proxyCandidates, useProxyPool }) => { + const cacheContext = { + days, + companiesSource: normalizeCompaniesSource(companiesSource), + }; + + const memorySnapshot = getSnapshotFromMemory(cacheContext); if (memorySnapshot) return memorySnapshot; if (useDbCache) { - const dbSnapshot = await getSnapshotFromSupabase(days); + const dbSnapshot = await getSnapshotFromSupabase(cacheContext); if (dbSnapshot) { - saveSnapshotInMemory(days, dbSnapshot); + saveSnapshotInMemory(cacheContext, dbSnapshot); return dbSnapshot; } } - const freshSnapshot = await buildCoreSnapshot(days); - saveSnapshotInMemory(days, freshSnapshot); + const freshSnapshot = await buildCoreSnapshot({ + days, + companiesSource: cacheContext.companiesSource, + companyBatchSize, + proxyCandidates, + useProxyPool, + }); + saveSnapshotInMemory(cacheContext, freshSnapshot); if (useDbCache) { - await saveSnapshotInSupabase(days, freshSnapshot); + await saveSnapshotInSupabase(cacheContext, freshSnapshot); } return freshSnapshot; @@ -756,6 +984,9 @@ const parseRequestOptions = (query) => { days, page, perPage, + companiesSource: normalizeCompaniesSource(query.companiesSource), + companyBatchSize: parsePositiveInt(query.companyBatchSize, DEFAULT_COMPANY_BATCH_SIZE, 1, 10), + disableProxy: parseBoolean(query.disableProxy, false), fromJobId: query.fromJobId != null ? parsePositiveInt(query.fromJobId, null, 1, Number.MAX_SAFE_INTEGER) : null, compactJobs: parseBoolean(query.compactJobs, true), includePoints: parseBoolean(query.includePoints, false), @@ -774,7 +1005,17 @@ const parseRequestOptions = (query) => { router.get('/', async (req, res) => { try { const options = parseRequestOptions(req.query); - const snapshot = await getCoreSnapshot(options.days, options.useDbCache); + const proxyCandidates = options.disableProxy ? [] : getTruckyProxyCandidates(req.query); + const useProxyPool = !options.disableProxy; + const proxyPoolSize = useProxyPool ? getCachedProxies().length : 0; + const snapshot = await getCoreSnapshot({ + days: options.days, + useDbCache: options.useDbCache, + companiesSource: options.companiesSource, + companyBatchSize: options.companyBatchSize, + proxyCandidates, + useProxyPool, + }); const paginated = options.fromJobId != null ? paginateJobsFromId(snapshot.jobs, options.fromJobId, options.perPage) : paginateJobs(snapshot.jobs, options.page, options.perPage); @@ -804,6 +1045,14 @@ router.get('/', async (req, res) => { const responsePayload = { fetchedAt: snapshot.fetchedAt, days: options.days, + companiesSourceRequested: options.companiesSource, + companiesSourceUsed: snapshot.companiesSourceUsed || options.companiesSource, + truckyRequestConfig: { + companyBatchSize: options.companyBatchSize, + explicitProxiesCount: proxyCandidates.length, + useProxyPool, + proxyPoolSize, + }, companiesProcessed: snapshot.companiesProcessed, pagination: { mode: paginated.mode, @@ -838,12 +1087,30 @@ router.get('/', async (req, res) => { router.get('/summary', async (req, res) => { try { const options = parseRequestOptions(req.query); - const snapshot = await getCoreSnapshot(options.days, options.useDbCache); + const proxyCandidates = options.disableProxy ? [] : getTruckyProxyCandidates(req.query); + const useProxyPool = !options.disableProxy; + const proxyPoolSize = useProxyPool ? getCachedProxies().length : 0; + const snapshot = await getCoreSnapshot({ + days: options.days, + useDbCache: options.useDbCache, + companiesSource: options.companiesSource, + companyBatchSize: options.companyBatchSize, + proxyCandidates, + useProxyPool, + }); res.set('Cache-Control', 'public, s-maxage=30, stale-while-revalidate=15'); return res.json({ fetchedAt: snapshot.fetchedAt, days: options.days, + companiesSourceRequested: options.companiesSource, + companiesSourceUsed: snapshot.companiesSourceUsed || options.companiesSource, + truckyRequestConfig: { + companyBatchSize: options.companyBatchSize, + explicitProxiesCount: proxyCandidates.length, + useProxyPool, + proxyPoolSize, + }, companiesProcessed: snapshot.companiesProcessed, totalJobs: snapshot.jobs.length, errorsCount: snapshot.errors.length, @@ -857,7 +1124,17 @@ router.get('/summary', async (req, res) => { router.get('/jobs', async (req, res) => { try { const options = parseRequestOptions(req.query); - const snapshot = await getCoreSnapshot(options.days, options.useDbCache); + const proxyCandidates = options.disableProxy ? [] : getTruckyProxyCandidates(req.query); + const useProxyPool = !options.disableProxy; + const proxyPoolSize = useProxyPool ? getCachedProxies().length : 0; + const snapshot = await getCoreSnapshot({ + days: options.days, + useDbCache: options.useDbCache, + companiesSource: options.companiesSource, + companyBatchSize: options.companyBatchSize, + proxyCandidates, + useProxyPool, + }); const paginated = options.fromJobId != null ? paginateJobsFromId(snapshot.jobs, options.fromJobId, options.perPage) : paginateJobs(snapshot.jobs, options.page, options.perPage); @@ -866,6 +1143,14 @@ router.get('/jobs', async (req, res) => { return res.json({ fetchedAt: snapshot.fetchedAt, days: options.days, + companiesSourceRequested: options.companiesSource, + companiesSourceUsed: snapshot.companiesSourceUsed || options.companiesSource, + truckyRequestConfig: { + companyBatchSize: options.companyBatchSize, + explicitProxiesCount: proxyCandidates.length, + useProxyPool, + proxyPoolSize, + }, pagination: { mode: paginated.mode, page: paginated.page, @@ -887,7 +1172,17 @@ router.get('/jobs', async (req, res) => { router.get('/geo', async (req, res) => { try { const options = parseRequestOptions(req.query); - const snapshot = await getCoreSnapshot(options.days, options.useDbCache); + const proxyCandidates = options.disableProxy ? [] : getTruckyProxyCandidates(req.query); + const useProxyPool = !options.disableProxy; + const proxyPoolSize = useProxyPool ? getCachedProxies().length : 0; + const snapshot = await getCoreSnapshot({ + days: options.days, + useDbCache: options.useDbCache, + companiesSource: options.companiesSource, + companyBatchSize: options.companyBatchSize, + proxyCandidates, + useProxyPool, + }); const paginated = options.fromJobId != null ? paginateJobsFromId(snapshot.jobs, options.fromJobId, options.perPage) : paginateJobs(snapshot.jobs, options.page, options.perPage); @@ -915,6 +1210,14 @@ router.get('/geo', async (req, res) => { return res.json({ fetchedAt: snapshot.fetchedAt, days: options.days, + companiesSourceRequested: options.companiesSource, + companiesSourceUsed: snapshot.companiesSourceUsed || options.companiesSource, + truckyRequestConfig: { + companyBatchSize: options.companyBatchSize, + explicitProxiesCount: proxyCandidates.length, + useProxyPool, + proxyPoolSize, + }, pagination: { mode: paginated.mode, page: paginated.page, diff --git a/src/routes/v2/peruserver/trucky/top-km/index.js b/src/routes/v2/peruserver/trucky/top-km/index.js index adec21f..a8270e9 100644 --- a/src/routes/v2/peruserver/trucky/top-km/index.js +++ b/src/routes/v2/peruserver/trucky/top-km/index.js @@ -1,5 +1,10 @@ const { Router } = require('express'); const axios = require('axios'); +const { + startPeriodicProxyUpdate, + getRandomProxy, + getCachedProxies, +} = require('../../../../../utils/proxy-manager'); const router = Router(); @@ -16,6 +21,12 @@ const MAX_LIMIT = 200; const CACHE_TTL_CURRENT_MONTH_MS = 30 * 60 * 1000; // 30 minutos const CACHE_TTL_PAST_MONTH_MS = 24 * 60 * 60 * 1000; // 24 horas const COMPANIES_CACHE_TTL_MS = 6 * 60 * 60 * 1000; // 6 horas +const DEFAULT_COMPANY_BATCH_SIZE = Math.min( + 5, + Math.max(1, Number.parseInt(process.env.TRUCKY_CONCURRENCY || '3', 10) || 3) +); +const TRUCKY_MAX_RETRIES = Math.max(1, Number.parseInt(process.env.TRUCKY_MAX_RETRIES || '3', 10) || 3); +const TRUCKY_RETRY_BASE_MS = Math.max(200, Number.parseInt(process.env.TRUCKY_RETRY_BASE_MS || '700', 10) || 700); const TRUCKY_HEADERS = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/132.0.0.0 Safari/537.36', Accept: 'application/json, text/plain, */*', @@ -24,6 +35,8 @@ const TRUCKY_HEADERS = { 'Accept-Language': 'es-ES,es;q=0.9,en;q=0.8', }; +startPeriodicProxyUpdate(); + const monthlyCache = new Map(); const accumulatedCache = new Map(); const companiesCache = { @@ -88,6 +101,102 @@ const parseLimit = (rawLimit) => { return parsed; }; +const parseBoolean = (rawValue, defaultValue) => { + if (rawValue == null) return defaultValue; + const value = String(rawValue).trim().toLowerCase(); + + if (['1', 'true', 'yes', 'y', 'on'].includes(value)) return true; + if (['0', 'false', 'no', 'n', 'off'].includes(value)) return false; + + return defaultValue; +}; + +const parsePositiveInt = (rawValue, defaultValue, minValue, maxValue) => { + const parsed = Number(rawValue); + if (!Number.isFinite(parsed)) return defaultValue; + return Math.min(Math.max(Math.floor(parsed), minValue), maxValue); +}; + +const parseProxyForAxios = (rawProxy) => { + if (!rawProxy) return null; + + try { + const withProtocol = /^(http|https):\/\//i.test(rawProxy) + ? rawProxy + : `http://${rawProxy}`; + const url = new URL(withProtocol); + + return { + protocol: url.protocol.replace(':', ''), + host: url.hostname, + port: Number(url.port || (url.protocol === 'https:' ? 443 : 80)), + auth: url.username + ? { + username: decodeURIComponent(url.username), + password: decodeURIComponent(url.password || ''), + } + : undefined, + }; + } catch (error) { + return null; + } +}; + +const getTruckyProxyCandidates = (query) => { + const queryProxyRaw = query && query.truckyProxy ? String(query.truckyProxy).trim() : ''; + const queryProxyListRaw = query && query.truckyProxyList ? String(query.truckyProxyList).trim() : ''; + + const queryProxies = queryProxyListRaw + ? queryProxyListRaw.split(',').map((item) => item.trim()).filter(Boolean) + : []; + + if (queryProxyRaw) { + queryProxies.unshift(queryProxyRaw); + } + + return queryProxies.filter(Boolean); +}; + +const sleep = (ms) => new Promise((resolve) => setTimeout(resolve, ms)); + +const isRetryableTruckyStatus = (status) => [403, 429, 500, 502, 503, 504].includes(status); + +const truckyRequestWithRetry = async ({ url, params, timeout = 15000, proxyCandidates = [], useProxyPool = true }) => { + let lastError = null; + + for (let attempt = 0; attempt < TRUCKY_MAX_RETRIES; attempt += 1) { + const poolProxy = useProxyPool && proxyCandidates.length === 0 ? getRandomProxy() : null; + const proxyRaw = proxyCandidates.length > 0 + ? proxyCandidates[attempt % proxyCandidates.length] + : poolProxy; + const proxy = parseProxyForAxios(proxyRaw); + + try { + const response = await axios.get(url, { + params, + headers: TRUCKY_HEADERS, + timeout, + proxy: proxy || undefined, + }); + + return response; + } catch (error) { + lastError = error; + const status = Number(error && error.response && error.response.status); + const shouldRetry = isRetryableTruckyStatus(status); + + if (!shouldRetry || attempt === TRUCKY_MAX_RETRIES - 1) { + throw error; + } + + const waitMs = TRUCKY_RETRY_BASE_MS * (attempt + 1) + Math.floor(Math.random() * 250); + await sleep(waitMs); + } + } + + throw lastError || new Error('Error desconocido consultando Trucky'); +}; + const refreshCompaniesCache = async () => { try { let companyIds = []; @@ -177,12 +286,14 @@ const mapWithConcurrency = async (items, concurrency, asyncMapper) => { return results; }; -const getMonthlyStatsForCompany = async (companyId, month, year) => { +const getMonthlyStatsForCompany = async (companyId, month, year, requestOptions = {}) => { try { - const response = await axios.get(`${TRUCKY_BASE_URL}/${companyId}/stats/monthly`, { + const response = await truckyRequestWithRetry({ + url: `${TRUCKY_BASE_URL}/${companyId}/stats/monthly`, params: { month, year }, - headers: TRUCKY_HEADERS, timeout: 15000, + proxyCandidates: requestOptions.proxyCandidates || [], + useProxyPool: requestOptions.useProxyPool !== false, }); if (response.status === 200 && response.data) { @@ -203,7 +314,7 @@ const getMonthlyStatsForCompany = async (companyId, month, year) => { } }; -const getCompanyInfo = async (companyId) => { +const getCompanyInfo = async (companyId, requestOptions = {}) => { const cacheKey = `company-${companyId}`; const cached = monthlyCache.get(cacheKey); @@ -212,9 +323,11 @@ const getCompanyInfo = async (companyId) => { } try { - const response = await axios.get(`${TRUCKY_BASE_URL}/${companyId}`, { - headers: TRUCKY_HEADERS, + const response = await truckyRequestWithRetry({ + url: `${TRUCKY_BASE_URL}/${companyId}`, timeout: 15000, + proxyCandidates: requestOptions.proxyCandidates || [], + useProxyPool: requestOptions.useProxyPool !== false, }); if (response.status === 200 && response.data) { @@ -264,7 +377,7 @@ const generateMonthRanges = (startMonth, startYear, endMonth, endYear) => { const getMonthCacheKey = (companyId, month, year) => `month-${companyId}-${year}-${month}`; -const fetchMonthWithCache = async (companyId, month, year, isCurrentMonth) => { +const fetchMonthWithCache = async (companyId, month, year, isCurrentMonth, requestOptions = {}) => { const cacheKey = getMonthCacheKey(companyId, month, year); const cached = monthlyCache.get(cacheKey); @@ -274,7 +387,7 @@ const fetchMonthWithCache = async (companyId, month, year, isCurrentMonth) => { return cached.data; } - const data = await getMonthlyStatsForCompany(companyId, month, year); + const data = await getMonthlyStatsForCompany(companyId, month, year, requestOptions); monthlyCache.set(cacheKey, { data, @@ -284,7 +397,7 @@ const fetchMonthWithCache = async (companyId, month, year, isCurrentMonth) => { return data; }; -const buildAccumulatedResponse = async ({ startMonth, startYear, limit }) => { +const buildAccumulatedResponse = async ({ startMonth, startYear, limit, companyBatchSize, requestOptions }) => { const currentDate = nowUtc(); const currentMonth = currentDate.getUTCMonth() + 1; const currentYear = currentDate.getUTCFullYear(); @@ -295,9 +408,9 @@ const buildAccumulatedResponse = async ({ startMonth, startYear, limit }) => { const companyAccumulatedData = await mapWithConcurrency( selectedCompanyIds, - 5, + companyBatchSize, async (companyId) => { - const companyInfo = await getCompanyInfo(companyId); + const companyInfo = await getCompanyInfo(companyId, requestOptions); let totalDistance = 0; let totalJobs = 0; let monthsProcessed = 0; @@ -305,7 +418,7 @@ const buildAccumulatedResponse = async ({ startMonth, startYear, limit }) => { for (const { month, year } of monthRanges) { const isCurrentMonth = month === currentMonth && year === currentYear; - const monthData = await fetchMonthWithCache(companyId, month, year, isCurrentMonth); + const monthData = await fetchMonthWithCache(companyId, month, year, isCurrentMonth, requestOptions); if (monthData.success) { totalDistance += monthData.distance; @@ -399,7 +512,19 @@ router.get('/', async (req, res) => { const { month: startMonth, year: startYear } = parsedStartMonthYear; const limit = parseLimit(req.query.limit); - const params = { startMonth, startYear, limit }; + const companyBatchSize = parsePositiveInt(req.query.companyBatchSize, DEFAULT_COMPANY_BATCH_SIZE, 1, 10); + const disableProxy = parseBoolean(req.query.disableProxy, false); + const proxyCandidates = disableProxy ? [] : getTruckyProxyCandidates(req.query); + const params = { + startMonth, + startYear, + limit, + companyBatchSize, + requestOptions: { + proxyCandidates, + useProxyPool: !disableProxy, + }, + }; const cacheKey = getAccumulatedCacheKey(params); const entry = getCacheEntry(cacheKey); @@ -417,7 +542,15 @@ router.get('/', async (req, res) => { } if (entry.payload) { - return res.json(entry.payload); + return res.json({ + ...entry.payload, + truckyRequestConfig: { + companyBatchSize, + explicitProxiesCount: proxyCandidates.length, + useProxyPool: !disableProxy, + proxyPoolSize: !disableProxy ? getCachedProxies().length : 0, + }, + }); } return res.status(503).json({ diff --git a/src/routes/v2/peruserver/trucky/top-km/monthly.js b/src/routes/v2/peruserver/trucky/top-km/monthly.js index 1fe66fd..0c089f5 100644 --- a/src/routes/v2/peruserver/trucky/top-km/monthly.js +++ b/src/routes/v2/peruserver/trucky/top-km/monthly.js @@ -1,5 +1,10 @@ const { Router } = require('express'); const axios = require('axios'); +const { + startPeriodicProxyUpdate, + getRandomProxy, + getCachedProxies, +} = require('../../../../../utils/proxy-manager'); const router = Router(); @@ -15,6 +20,12 @@ const DEFAULT_LIMIT = 50; const MAX_LIMIT = 200; const CACHE_TTL_MS = 30 * 60 * 1000; const COMPANIES_CACHE_TTL_MS = 6 * 60 * 60 * 1000; // 6 horas +const DEFAULT_COMPANY_BATCH_SIZE = Math.min( + 5, + Math.max(1, Number.parseInt(process.env.TRUCKY_CONCURRENCY || '3', 10) || 3) +); +const TRUCKY_MAX_RETRIES = Math.max(1, Number.parseInt(process.env.TRUCKY_MAX_RETRIES || '3', 10) || 3); +const TRUCKY_RETRY_BASE_MS = Math.max(200, Number.parseInt(process.env.TRUCKY_RETRY_BASE_MS || '700', 10) || 700); const TRUCKY_HEADERS = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/132.0.0.0 Safari/537.36', Accept: 'application/json, text/plain, */*', @@ -23,6 +34,8 @@ const TRUCKY_HEADERS = { 'Accept-Language': 'es-ES,es;q=0.9,en;q=0.8', }; +startPeriodicProxyUpdate(); + const monthlyCache = new Map(); const companiesCache = { companyIds: [], @@ -70,6 +83,102 @@ const parseLimit = (rawLimit) => { return parsed; }; +const parseBoolean = (rawValue, defaultValue) => { + if (rawValue == null) return defaultValue; + const value = String(rawValue).trim().toLowerCase(); + + if (['1', 'true', 'yes', 'y', 'on'].includes(value)) return true; + if (['0', 'false', 'no', 'n', 'off'].includes(value)) return false; + + return defaultValue; +}; + +const parsePositiveInt = (rawValue, defaultValue, minValue, maxValue) => { + const parsed = Number(rawValue); + if (!Number.isFinite(parsed)) return defaultValue; + return Math.min(Math.max(Math.floor(parsed), minValue), maxValue); +}; + +const parseProxyForAxios = (rawProxy) => { + if (!rawProxy) return null; + + try { + const withProtocol = /^(http|https):\/\//i.test(rawProxy) + ? rawProxy + : `http://${rawProxy}`; + const url = new URL(withProtocol); + + return { + protocol: url.protocol.replace(':', ''), + host: url.hostname, + port: Number(url.port || (url.protocol === 'https:' ? 443 : 80)), + auth: url.username + ? { + username: decodeURIComponent(url.username), + password: decodeURIComponent(url.password || ''), + } + : undefined, + }; + } catch (error) { + return null; + } +}; + +const getTruckyProxyCandidates = (query) => { + const queryProxyRaw = query && query.truckyProxy ? String(query.truckyProxy).trim() : ''; + const queryProxyListRaw = query && query.truckyProxyList ? String(query.truckyProxyList).trim() : ''; + + const queryProxies = queryProxyListRaw + ? queryProxyListRaw.split(',').map((item) => item.trim()).filter(Boolean) + : []; + + if (queryProxyRaw) { + queryProxies.unshift(queryProxyRaw); + } + + return queryProxies.filter(Boolean); +}; + +const sleep = (ms) => new Promise((resolve) => setTimeout(resolve, ms)); + +const isRetryableTruckyStatus = (status) => [403, 429, 500, 502, 503, 504].includes(status); + +const truckyRequestWithRetry = async ({ url, params, timeout = 15000, proxyCandidates = [], useProxyPool = true }) => { + let lastError = null; + + for (let attempt = 0; attempt < TRUCKY_MAX_RETRIES; attempt += 1) { + const poolProxy = useProxyPool && proxyCandidates.length === 0 ? getRandomProxy() : null; + const proxyRaw = proxyCandidates.length > 0 + ? proxyCandidates[attempt % proxyCandidates.length] + : poolProxy; + const proxy = parseProxyForAxios(proxyRaw); + + try { + const response = await axios.get(url, { + params, + headers: TRUCKY_HEADERS, + timeout, + proxy: proxy || undefined, + }); + + return response; + } catch (error) { + lastError = error; + const status = Number(error && error.response && error.response.status); + const shouldRetry = isRetryableTruckyStatus(status); + + if (!shouldRetry || attempt === TRUCKY_MAX_RETRIES - 1) { + throw error; + } + + const waitMs = TRUCKY_RETRY_BASE_MS * (attempt + 1) + Math.floor(Math.random() * 250); + await sleep(waitMs); + } + } + + throw lastError || new Error('Error desconocido consultando Trucky'); +}; + const mapWithConcurrency = async (items, concurrency, asyncMapper) => { const results = new Array(items.length); let nextIndex = 0; @@ -159,7 +268,7 @@ const getCompanies = async () => { return companiesCache.companyIds; }; -const getCompanyMonthlyData = async (companyId, month, year) => { +const getCompanyMonthlyData = async (companyId, month, year, requestOptions = {}) => { const processedAt = nowUtc().toISOString(); const fallbackItem = { @@ -177,14 +286,18 @@ const getCompanyMonthlyData = async (companyId, month, year) => { try { const [companyResponse, statsResponse] = await Promise.allSettled([ - axios.get(`${TRUCKY_BASE_URL}/${companyId}`, { - headers: TRUCKY_HEADERS, + truckyRequestWithRetry({ + url: `${TRUCKY_BASE_URL}/${companyId}`, timeout: 15000, + proxyCandidates: requestOptions.proxyCandidates || [], + useProxyPool: requestOptions.useProxyPool !== false, }), - axios.get(`${TRUCKY_BASE_URL}/${companyId}/stats/monthly`, { + truckyRequestWithRetry({ + url: `${TRUCKY_BASE_URL}/${companyId}/stats/monthly`, params: { month, year }, - headers: TRUCKY_HEADERS, timeout: 15000, + proxyCandidates: requestOptions.proxyCandidates || [], + useProxyPool: requestOptions.useProxyPool !== false, }), ]); @@ -229,12 +342,12 @@ const getCompanyMonthlyData = async (companyId, month, year) => { } }; -const buildMonthlyResponse = async ({ month, year, limit }) => { +const buildMonthlyResponse = async ({ month, year, limit, companyBatchSize, requestOptions }) => { const allCompanyIds = await getCompanies(); const selectedCompanyIds = allCompanyIds.slice(0, limit); - const items = await mapWithConcurrency(selectedCompanyIds, 5, async (companyId) => { - return getCompanyMonthlyData(companyId, month, year); + const items = await mapWithConcurrency(selectedCompanyIds, companyBatchSize, async (companyId) => { + return getCompanyMonthlyData(companyId, month, year, requestOptions); }); const sortedItems = [...items].sort((a, b) => { @@ -305,7 +418,20 @@ router.get('/', async (req, res) => { const { month, year } = parsedMonthYear; const limit = parseLimit(req.query.limit); - const params = { month, year, limit }; + const companyBatchSize = parsePositiveInt(req.query.companyBatchSize, DEFAULT_COMPANY_BATCH_SIZE, 1, 10); + const disableProxy = parseBoolean(req.query.disableProxy, false); + const proxyCandidates = disableProxy ? [] : getTruckyProxyCandidates(req.query); + + const params = { + month, + year, + limit, + companyBatchSize, + requestOptions: { + proxyCandidates, + useProxyPool: !disableProxy, + }, + }; const cacheKey = getCacheKey(params); const entry = getCacheEntry(cacheKey); @@ -323,7 +449,15 @@ router.get('/', async (req, res) => { } if (entry.payload) { - return res.json(entry.payload); + return res.json({ + ...entry.payload, + truckyRequestConfig: { + companyBatchSize, + explicitProxiesCount: proxyCandidates.length, + useProxyPool: !disableProxy, + proxyPoolSize: !disableProxy ? getCachedProxies().length : 0, + }, + }); } return res.status(503).json({ diff --git a/src/utils/bomberos-scraper.js b/src/utils/bomberos-scraper.js index a3ce43d..33e6d05 100644 --- a/src/utils/bomberos-scraper.js +++ b/src/utils/bomberos-scraper.js @@ -1,5 +1,6 @@ const fetch = require('node-fetch'); const { load } = require('cheerio'); +const { HttpsProxyAgent } = require('https-proxy-agent'); const { getRandomProxy, getCachedProxies } = require('./proxy-manager'); const { isValidCoord, normalizeLocation, geocodeApprox } = require('./geocode'); @@ -102,7 +103,10 @@ async function parseBomberos24HorasReal() { } if (proxy) { - fetchOptions.agent = new (require('http')).Agent({ httpAgent: proxy }); + const proxyUrl = /^(http|https):\/\//i.test(proxy) + ? proxy + : `http://${proxy}`; + fetchOptions.agent = new HttpsProxyAgent(proxyUrl); attempText += ` (proxy: ${proxy.substring(0, 40)}...)`; } } else if (attempt === 1) { diff --git a/src/utils/proxy-manager.js b/src/utils/proxy-manager.js index c1e4f11..9bfa933 100644 --- a/src/utils/proxy-manager.js +++ b/src/utils/proxy-manager.js @@ -3,6 +3,7 @@ const fetch = require('node-fetch'); let cachedProxies = []; let lastProxyUpdate = null; let isUpdatingProxies = false; +let proxyUpdateIntervalId = null; /** * Obtiene proxies desde la API de ProxyScrape @@ -92,6 +93,10 @@ function getProxyStatus() { * Inicia la actualización periódica de proxies */ function startPeriodicProxyUpdate() { + if (proxyUpdateIntervalId) { + return; + } + const intervalMs = parseInt(process.env.PROXY_UPDATE_INTERVAL) || 3600000; // 1 hora por defecto console.log(`🕐 Actualizaciones de proxies cada ${intervalMs / 60000} minutos`); @@ -100,9 +105,13 @@ function startPeriodicProxyUpdate() { fetchProxiesFromAPI(); // Luego periódicamente - setInterval(() => { + proxyUpdateIntervalId = setInterval(() => { fetchProxiesFromAPI(); }, intervalMs); + + if (typeof proxyUpdateIntervalId.unref === 'function') { + proxyUpdateIntervalId.unref(); + } } module.exports = {