From bb6f529f337e052ef9efc5654df49a6702be03a3 Mon Sep 17 00:00:00 2001 From: Musicminion Date: Sun, 25 Jan 2026 23:56:46 +0800 Subject: [PATCH 1/7] feat: add proxy learn feature. - Todo: add cron job to update documents - Changes: add `path.learnPagesDir` to `web/config` - Changes: add `learn` module - Changes: develop env settings --- develop/dev.env | 4 + services/web/config/settings.defaults.js | 2 + .../web/modules/learn/app/src/LearnProxy.mjs | 67 ++++++++++ .../web/modules/learn/app/src/LearnRouter.mjs | 16 +++ .../modules/learn/app/src/sanitizeOptions.mjs | 118 ++++++++++++++++++ .../web/modules/learn/app/views/learn.pug | 27 ++++ services/web/modules/learn/index.mjs | 22 ++++ 7 files changed, 256 insertions(+) create mode 100644 services/web/modules/learn/app/src/LearnProxy.mjs create mode 100644 services/web/modules/learn/app/src/LearnRouter.mjs create mode 100644 services/web/modules/learn/app/src/sanitizeOptions.mjs create mode 100644 services/web/modules/learn/app/views/learn.pug create mode 100644 services/web/modules/learn/index.mjs diff --git a/develop/dev.env b/develop/dev.env index cf8d1991c1..47c7006eef 100644 --- a/develop/dev.env +++ b/develop/dev.env @@ -22,3 +22,7 @@ WEBPACK_HOST=webpack WEB_API_PASSWORD=overleaf WEB_API_USER=overleaf WEB_HOST=web +ADMIN_PRIVILEGE_AVAILABLE=true +V1_HISTORY_URL=http://history-v1:3100/api +OT_JWT_AUTH_KEY="very secret key" +OVERLEAF_PROXY_LEARN=true \ No newline at end of file diff --git a/services/web/config/settings.defaults.js b/services/web/config/settings.defaults.js index fb7c1e3585..f74ffb4755 100644 --- a/services/web/config/settings.defaults.js +++ b/services/web/config/settings.defaults.js @@ -744,6 +744,7 @@ module.exports = { // them to disk here). dumpFolder: Path.resolve(__dirname, '../data/dumpFolder'), uploadFolder: Path.resolve(__dirname, '../data/uploads'), + learnPagesDir: Path.resolve(__dirname, '../data/learnPages'), }, // Automatic Snapshots @@ -1062,6 +1063,7 @@ module.exports = { moduleImportSequence: [ 'history-v1', 'launchpad', + 'learn', 'server-ce-scripts', 'user-activate', 'sandboxed-compiles', diff --git a/services/web/modules/learn/app/src/LearnProxy.mjs b/services/web/modules/learn/app/src/LearnProxy.mjs new file mode 100644 index 0000000000..f3a158eb9f --- /dev/null +++ b/services/web/modules/learn/app/src/LearnProxy.mjs @@ -0,0 +1,67 @@ +import sanitizeHtml from 'sanitize-html' +import Settings from '@overleaf/settings' +import { sanitizeOptions } from './sanitizeOptions.mjs' +import fs from 'node:fs' +import logger from '@overleaf/logger' +import Path from 'node:path' +import { expressify } from '@overleaf/promise-utils' + +async function learnPage(req, res) { + logger.debug({}, 'Learn proxy requested '+ req.path) + let reqPath = req.path + // Trim leading '/', only show the path after '/' + if (reqPath.startsWith('/')) { + reqPath = reqPath.slice(1) + } else { + res.status(400).send('Bad Request') + return + } + let learnPath = reqPath + + if (learnPath === '') { + logger.debug({}, 'Learn proxy requested root path, redirecting to Main/Page') + learnPath = 'Main Page' + } + + // Encode the path for file system usage + learnPath = encodeURIComponent(learnPath.replace(/_/g, ' ')) + logger.debug({}, `Learn proxy requested path: ${learnPath}`) + + // Contents.json Should be sidebarHtml + const contentsFilePath = Path.resolve(Settings.path.learnPagesDir, `Contents.json`) + // If Contents.json does not exist, return 500 + if (!fs.existsSync(contentsFilePath)) { + logger.error({}, `Learn proxy Contents.json not found at path: ${contentsFilePath}`) + res.status(500).send('Internal Server Error') + return + } + const raw = await fs.promises.readFile(contentsFilePath, 'utf-8') + const json = JSON.parse(raw) + const sidebarHtml = json.text['*'] + + + let pageFilePath = Path.resolve("/overleaf/services/web/data/learnPages/", `${learnPath}.json`) + // If the page does not exist, fallback to "Learn LaTeX in 30 minutes" + if (!fs.existsSync(pageFilePath)) { + learnPath = 'Learn%20LaTeX%20in%2030%20minutes' + pageFilePath = Path.resolve("/overleaf/services/web/data/learnPages/", `${learnPath}.json`) + } + + const pageRaw = await fs.promises.readFile(pageFilePath, 'utf-8') + const pageJson = JSON.parse(pageRaw) + const pageTitle = pageJson.title + const pageHtml = pageJson.text['*'] + + + res.render(Path.resolve(import.meta.dirname, '../views/learn'), { + sidebarHtml: sanitizeHtml(sidebarHtml, sanitizeOptions), + pageTitle: pageTitle, + pageHtml: sanitizeHtml(pageHtml, sanitizeOptions), + }) +} + +const LearnProxyController = { + learnPage: expressify(learnPage), +} + +export default LearnProxyController diff --git a/services/web/modules/learn/app/src/LearnRouter.mjs b/services/web/modules/learn/app/src/LearnRouter.mjs new file mode 100644 index 0000000000..f890123673 --- /dev/null +++ b/services/web/modules/learn/app/src/LearnRouter.mjs @@ -0,0 +1,16 @@ +import logger from '@overleaf/logger' +import Settings from '@overleaf/settings' +import LearnProxyController from './LearnProxy.mjs' + + +export default { + apply(webRouter) { + if (!Settings.proxyLearn) { + logger.debug({}, 'Learn proxy disabled via Settings.proxyLearn') + return + } + + webRouter.get('/learn', LearnProxyController.learnPage) + webRouter.use('/learn/latex', LearnProxyController.learnPage) + }, +} \ No newline at end of file diff --git a/services/web/modules/learn/app/src/sanitizeOptions.mjs b/services/web/modules/learn/app/src/sanitizeOptions.mjs new file mode 100644 index 0000000000..4dd14363ca --- /dev/null +++ b/services/web/modules/learn/app/src/sanitizeOptions.mjs @@ -0,0 +1,118 @@ +import settings from "@overleaf/settings"; + +const sanitizeOptions = { + allowedTags: [ + 'a', + 'abbr', + 'address', + 'article', + 'aside', + 'b', + 'blockquote', + 'br', + 'caption', + 'code', + 'col', + 'colgroup', + 'dd', + 'del', + 'details', + 'div', + 'dl', + 'dt', + 'em', + 'figure', + 'figcaption', + 'h1', + 'h2', + 'h3', + 'h4', + 'h5', + 'h6', + 'hr', + 'i', + 'img', + 'ins', + 'kbd', + 'li', + 'main', + 'ol', + 'p', + 'pre', + 's', + 'section', + 'small', + 'span', + 'strong', + 'sub', + 'summary', + 'sup', + 'table', + 'tbody', + 'td', + 'tfoot', + 'th', + 'thead', + 'time', + 'tr', + 'u', + 'ul', + 'video', + 'source', + 'iframe', + ], + allowedAttributes: { + '*': [ + 'aria-describedby', + 'aria-hidden', + 'aria-label', + 'class', + 'data-*', + 'dir', + 'id', + 'lang', + 'role', + 'style', + 'title', + 'translate', + ], + a: ['href', 'name', 'target', 'rel'], + img: ['alt', 'decoding', 'height', 'loading', 'src', 'srcset', 'width'], + iframe: [ + 'allow', + 'allowfullscreen', + 'frameborder', + 'height', + 'loading', + 'referrerpolicy', + 'src', + 'title', + 'width', + ], + td: ['colspan', 'rowspan', 'headers'], + th: ['abbr', 'colspan', 'rowspan', 'headers', 'scope'], + time: ['datetime'], + video: ['controls', 'height', 'poster', 'src', 'width'], + source: ['src', 'type'], + }, + allowedSchemes: ['http', 'https', 'mailto'], + allowProtocolRelative: false, + transformTags: { + 'a': (tagName, attribs) => { + if (attribs.href && attribs.href.startsWith('/learn/')) { + attribs.href = attribs.href.replace(/^\/learn\//, '/learn/latex/'); + } + return { tagName, attribs }; + }, + 'img': (tagName, attribs) => { + if (attribs.src && attribs.src.startsWith('/learn-scripts/images/')) { + attribs.src = settings.apis.wiki.url + attribs.src; + return { tagName, attribs }; + } + // Keep other images unchanged + return { tagName, attribs }; + } + } +} + +export { sanitizeOptions } \ No newline at end of file diff --git a/services/web/modules/learn/app/views/learn.pug b/services/web/modules/learn/app/views/learn.pug new file mode 100644 index 0000000000..91afe7f3ad --- /dev/null +++ b/services/web/modules/learn/app/views/learn.pug @@ -0,0 +1,27 @@ +extends ../../../../app/views/layout-marketing + +block vars + - metadata = metadata || {} + + +block content + #main-content.content.content-alt + .container.wiki + .row.template-page-header + .col-md-8 + + .row + .col-12.col-md-3.order-md-1.contents.card + .card-body + .mw-parser-output + != sidebarHtml + .col-12.col-md-9.order-md-5.page + .card.row-spaced + .card-body + .page-header + h1 + != pageTitle + div(data-ol-mathjax="") + .mw-parser-output + != pageHtml + diff --git a/services/web/modules/learn/index.mjs b/services/web/modules/learn/index.mjs new file mode 100644 index 0000000000..0715a2d859 --- /dev/null +++ b/services/web/modules/learn/index.mjs @@ -0,0 +1,22 @@ +import LearnRouter from './app/src/LearnRouter.mjs' +import Settings from '@overleaf/settings' +import logger from '@overleaf/logger' +/** @import { WebModule } from "../../types/web-module" */ + +/** @type {WebModule} */ +let LearnModule = {} + +if (process.env.OVERLEAF_PROXY_LEARN === 'true') { + logger.debug({}, 'Learn proxy enabled via OVERLEAF_PROXY_LEARN=true') + // Set learnPagesDir + Settings.proxyLearn = true + // Add header_extras with Documentation link + Settings.nav.header_extras.push({text: "Documentation", url: "/learn", class: "nav-link"}) + + // Export LearnModule + LearnModule = { + router: LearnRouter, + } +} + +export default LearnModule \ No newline at end of file From 5ae31a66566f87cf02cde9f31b94080ed9a625bb Mon Sep 17 00:00:00 2001 From: Musicminion Date: Mon, 26 Jan 2026 13:50:33 +0800 Subject: [PATCH 2/7] fix: add cache update - rename `learnPagesDir` to `learnPagesFolder` - add cache detect for learn pages --- services/web/config/settings.defaults.js | 2 +- .../web/modules/learn/app/src/LearnProxy.mjs | 61 ++++++++++++++++--- services/web/modules/learn/index.mjs | 13 +++- 3 files changed, 65 insertions(+), 11 deletions(-) diff --git a/services/web/config/settings.defaults.js b/services/web/config/settings.defaults.js index f74ffb4755..7235ac584e 100644 --- a/services/web/config/settings.defaults.js +++ b/services/web/config/settings.defaults.js @@ -744,7 +744,7 @@ module.exports = { // them to disk here). dumpFolder: Path.resolve(__dirname, '../data/dumpFolder'), uploadFolder: Path.resolve(__dirname, '../data/uploads'), - learnPagesDir: Path.resolve(__dirname, '../data/learnPages'), + learnPagesFolder: Path.resolve(__dirname, '../data/learnPages'), }, // Automatic Snapshots diff --git a/services/web/modules/learn/app/src/LearnProxy.mjs b/services/web/modules/learn/app/src/LearnProxy.mjs index f3a158eb9f..8e8bdc7640 100644 --- a/services/web/modules/learn/app/src/LearnProxy.mjs +++ b/services/web/modules/learn/app/src/LearnProxy.mjs @@ -5,9 +5,49 @@ import fs from 'node:fs' import logger from '@overleaf/logger' import Path from 'node:path' import { expressify } from '@overleaf/promise-utils' +import scrape from '../../../../scripts/learn/checkSanitize/scrape.mjs' +const { scrapeAndCachePage } = scrape + + +// Check if the filePath are older than maxCacheAge +// Based on Settings.apis.wiki.maxCacheAgeer +// If older, re-fetch and update the cache +async function checkFileCache(learnPagesFolder, pageName) { + const path = Path.join(learnPagesFolder, encodeURIComponent(pageName) + '.json') + // Check if file exists + let stat = null + let now = Date.now() + let mtime = 0 + try { + stat = await fs.promises.stat(path) + mtime = stat.mtime.getTime() + } catch (e) { + logger.error({ err: e }, `error stating cached page file: ${path}`) + } + + + // If the cache is older than maxCacheAge, refresh it + if (stat === null || now - mtime > Settings.apis.wiki.maxCacheAge) { + logger.debug({ + now: now, + mtime: mtime, + maxCacheAge: Settings.apis.wiki.maxCacheAge + }, `out of date cache detected for file: ${path}`) + + const BASE_URL = Settings.apis.wiki.url + + try { + await fs.promises.unlink(path) + logger.debug({}, `deleted cached page file to force re-fetching: ${path}`) + } catch (e) { + logger.error({ err: e }, `error deleting cached page file: ${path}`) + } + await scrapeAndCachePage(BASE_URL, pageName) + } + +} async function learnPage(req, res) { - logger.debug({}, 'Learn proxy requested '+ req.path) let reqPath = req.path // Trim leading '/', only show the path after '/' if (reqPath.startsWith('/')) { @@ -24,40 +64,43 @@ async function learnPage(req, res) { } // Encode the path for file system usage - learnPath = encodeURIComponent(learnPath.replace(/_/g, ' ')) + learnPath = encodeURIComponent(decodeURIComponent(learnPath.replace(/_/g, ' '))) logger.debug({}, `Learn proxy requested path: ${learnPath}`) // Contents.json Should be sidebarHtml - const contentsFilePath = Path.resolve(Settings.path.learnPagesDir, `Contents.json`) + let contentsFilePath = Path.resolve(Settings.path.learnPagesFolder, `Contents.json`) + // If Contents.json does not exist, return 500 if (!fs.existsSync(contentsFilePath)) { - logger.error({}, `Learn proxy Contents.json not found at path: ${contentsFilePath}`) - res.status(500).send('Internal Server Error') + await checkFileCache(Settings.path.learnPagesFolder, 'Contents') return } + + await checkFileCache(Settings.path.learnPagesFolder, 'Contents') const raw = await fs.promises.readFile(contentsFilePath, 'utf-8') const json = JSON.parse(raw) const sidebarHtml = json.text['*'] - - let pageFilePath = Path.resolve("/overleaf/services/web/data/learnPages/", `${learnPath}.json`) + let pageFilePath = Path.resolve(Settings.path.learnPagesFolder, `${learnPath}.json`) // If the page does not exist, fallback to "Learn LaTeX in 30 minutes" if (!fs.existsSync(pageFilePath)) { learnPath = 'Learn%20LaTeX%20in%2030%20minutes' - pageFilePath = Path.resolve("/overleaf/services/web/data/learnPages/", `${learnPath}.json`) + pageFilePath = Path.resolve(Settings.path.learnPagesFolder, `${learnPath}.json`) } + await checkFileCache(Settings.path.learnPagesFolder, decodeURIComponent(learnPath)) const pageRaw = await fs.promises.readFile(pageFilePath, 'utf-8') const pageJson = JSON.parse(pageRaw) const pageTitle = pageJson.title const pageHtml = pageJson.text['*'] - res.render(Path.resolve(import.meta.dirname, '../views/learn'), { sidebarHtml: sanitizeHtml(sidebarHtml, sanitizeOptions), pageTitle: pageTitle, pageHtml: sanitizeHtml(pageHtml, sanitizeOptions), }) + + } const LearnProxyController = { diff --git a/services/web/modules/learn/index.mjs b/services/web/modules/learn/index.mjs index 0715a2d859..079b10cc85 100644 --- a/services/web/modules/learn/index.mjs +++ b/services/web/modules/learn/index.mjs @@ -1,6 +1,8 @@ import LearnRouter from './app/src/LearnRouter.mjs' import Settings from '@overleaf/settings' import logger from '@overleaf/logger' +import scrape from '../../scripts/learn/checkSanitize/scrape.mjs' +const { getAllPagesAndCache } = scrape /** @import { WebModule } from "../../types/web-module" */ /** @type {WebModule} */ @@ -8,7 +10,16 @@ let LearnModule = {} if (process.env.OVERLEAF_PROXY_LEARN === 'true') { logger.debug({}, 'Learn proxy enabled via OVERLEAF_PROXY_LEARN=true') - // Set learnPagesDir + + // Get all page cache while starting up + // Then no need to write script for pull all pages cache + // Only ensure the pages are there, not latest content, we will update later. + const BASE_URL = Settings.apis.wiki.url + getAllPagesAndCache(BASE_URL).catch((err) => { + logger.error({ err: err }, 'error caching learn pages on startup') + }) + + // Set learnPagesFolder Settings.proxyLearn = true // Add header_extras with Documentation link Settings.nav.header_extras.push({text: "Documentation", url: "/learn", class: "nav-link"}) From 1e517c1d4572b3f4e94aaa040c27148cdb7c409e Mon Sep 17 00:00:00 2001 From: Musicminion Date: Mon, 26 Jan 2026 14:25:59 +0800 Subject: [PATCH 3/7] fix: learn page in production - add mkdir learnPages to init script --- server-ce/init_scripts/100_make_overleaf_data_dirs.sh | 3 +++ 1 file changed, 3 insertions(+) diff --git a/server-ce/init_scripts/100_make_overleaf_data_dirs.sh b/server-ce/init_scripts/100_make_overleaf_data_dirs.sh index 2723ae7a61..a0f47f8745 100755 --- a/server-ce/init_scripts/100_make_overleaf_data_dirs.sh +++ b/server-ce/init_scripts/100_make_overleaf_data_dirs.sh @@ -33,3 +33,6 @@ chown www-data:www-data /var/lib/overleaf/tmp/uploads mkdir -p /var/lib/overleaf/tmp/dumpFolder chown www-data:www-data /var/lib/overleaf/tmp/dumpFolder + +mkdir -p /overleaf/services/web/data/learnPages +chown www-data:www-data /overleaf/services/web/data/learnPages \ No newline at end of file From 829671ad49b8b20da53227176ae9a876af9be178 Mon Sep 17 00:00:00 2001 From: Musicminion Date: Mon, 26 Jan 2026 14:36:16 +0800 Subject: [PATCH 4/7] fix: bugs in init learn modules - add pull all pages in init stage --- services/web/modules/learn/index.mjs | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/services/web/modules/learn/index.mjs b/services/web/modules/learn/index.mjs index 079b10cc85..9b9efd6e1d 100644 --- a/services/web/modules/learn/index.mjs +++ b/services/web/modules/learn/index.mjs @@ -2,7 +2,7 @@ import LearnRouter from './app/src/LearnRouter.mjs' import Settings from '@overleaf/settings' import logger from '@overleaf/logger' import scrape from '../../scripts/learn/checkSanitize/scrape.mjs' -const { getAllPagesAndCache } = scrape +const { getAllPagesAndCache, scrapeAndCachePage } = scrape /** @import { WebModule } from "../../types/web-module" */ /** @type {WebModule} */ @@ -15,9 +15,10 @@ if (process.env.OVERLEAF_PROXY_LEARN === 'true') { // Then no need to write script for pull all pages cache // Only ensure the pages are there, not latest content, we will update later. const BASE_URL = Settings.apis.wiki.url - getAllPagesAndCache(BASE_URL).catch((err) => { - logger.error({ err: err }, 'error caching learn pages on startup') - }) + const pages = await getAllPagesAndCache(BASE_URL) + for (const page of pages) { + await scrapeAndCachePage(BASE_URL, page) + } // Set learnPagesFolder Settings.proxyLearn = true From 0ce6bf52c0be3185d394c9b4fada6e885bce9080 Mon Sep 17 00:00:00 2001 From: Musicminion Date: Mon, 26 Jan 2026 14:52:01 +0800 Subject: [PATCH 5/7] fix: document router with public access --- services/web/modules/learn/app/src/LearnRouter.mjs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/services/web/modules/learn/app/src/LearnRouter.mjs b/services/web/modules/learn/app/src/LearnRouter.mjs index f890123673..e2eb63151b 100644 --- a/services/web/modules/learn/app/src/LearnRouter.mjs +++ b/services/web/modules/learn/app/src/LearnRouter.mjs @@ -1,7 +1,7 @@ import logger from '@overleaf/logger' import Settings from '@overleaf/settings' import LearnProxyController from './LearnProxy.mjs' - +import AuthenticationController from '../../../../app/src/Features/Authentication/AuthenticationController.mjs' export default { apply(webRouter) { @@ -11,6 +11,8 @@ export default { } webRouter.get('/learn', LearnProxyController.learnPage) + AuthenticationController.addEndpointToLoginWhitelist('/learn/*') webRouter.use('/learn/latex', LearnProxyController.learnPage) + AuthenticationController.addEndpointToLoginWhitelist('/learn/latex/*') }, } \ No newline at end of file From 17558289f620984eedbca69f9c824b4d34c11206 Mon Sep 17 00:00:00 2001 From: Musicminion Date: Mon, 26 Jan 2026 16:27:32 +0800 Subject: [PATCH 6/7] refactor: refact learn script - add scrape script with CACHE_IN position - add ce script to use DATA path - drop old `web/data/learnPages` for wiki storeage --- server-ce/config/settings.js | 2 + .../100_make_overleaf_data_dirs.sh | 6 +- .../web/modules/learn/app/src/LearnProxy.mjs | 2 +- .../web/modules/learn/app/src/LearnRouter.mjs | 2 - .../learn/app/src/checkSanitizeOptions.mjs | 118 +++++++++++++++++ services/web/modules/learn/app/src/scrape.mjs | 125 ++++++++++++++++++ services/web/modules/learn/index.mjs | 6 +- 7 files changed, 252 insertions(+), 9 deletions(-) create mode 100644 services/web/modules/learn/app/src/checkSanitizeOptions.mjs create mode 100644 services/web/modules/learn/app/src/scrape.mjs diff --git a/server-ce/config/settings.js b/server-ce/config/settings.js index 640f1da0ce..5e88650ce3 100644 --- a/server-ce/config/settings.js +++ b/server-ce/config/settings.js @@ -174,6 +174,8 @@ const settings = { clsiCacheDir: Path.join(DATA_DIR, 'cache'), // Where to write the output files to disk after running LaTeX outputDir: Path.join(DATA_DIR, 'output'), + // Where to cache learn pages + learnPagesFolder: Path.join(DATA_DIR, 'learnPages'), }, // Server Config diff --git a/server-ce/init_scripts/100_make_overleaf_data_dirs.sh b/server-ce/init_scripts/100_make_overleaf_data_dirs.sh index a0f47f8745..fe9dd49f91 100755 --- a/server-ce/init_scripts/100_make_overleaf_data_dirs.sh +++ b/server-ce/init_scripts/100_make_overleaf_data_dirs.sh @@ -19,6 +19,9 @@ chown www-data:www-data /var/lib/overleaf/data/template_files mkdir -p /var/lib/overleaf/data/history chown www-data:www-data /var/lib/overleaf/data/history +mkdir -p /var/lib/overleaf/data/learnPages +chown www-data:www-data /var/lib/overleaf/data/learnPages + mkdir -p /var/lib/overleaf/tmp/projectHistories chown www-data:www-data /var/lib/overleaf/tmp/projectHistories @@ -33,6 +36,3 @@ chown www-data:www-data /var/lib/overleaf/tmp/uploads mkdir -p /var/lib/overleaf/tmp/dumpFolder chown www-data:www-data /var/lib/overleaf/tmp/dumpFolder - -mkdir -p /overleaf/services/web/data/learnPages -chown www-data:www-data /overleaf/services/web/data/learnPages \ No newline at end of file diff --git a/services/web/modules/learn/app/src/LearnProxy.mjs b/services/web/modules/learn/app/src/LearnProxy.mjs index 8e8bdc7640..3affdc5b40 100644 --- a/services/web/modules/learn/app/src/LearnProxy.mjs +++ b/services/web/modules/learn/app/src/LearnProxy.mjs @@ -5,7 +5,7 @@ import fs from 'node:fs' import logger from '@overleaf/logger' import Path from 'node:path' import { expressify } from '@overleaf/promise-utils' -import scrape from '../../../../scripts/learn/checkSanitize/scrape.mjs' +import scrape from './scrape.mjs' const { scrapeAndCachePage } = scrape diff --git a/services/web/modules/learn/app/src/LearnRouter.mjs b/services/web/modules/learn/app/src/LearnRouter.mjs index e2eb63151b..83cc770051 100644 --- a/services/web/modules/learn/app/src/LearnRouter.mjs +++ b/services/web/modules/learn/app/src/LearnRouter.mjs @@ -11,8 +11,6 @@ export default { } webRouter.get('/learn', LearnProxyController.learnPage) - AuthenticationController.addEndpointToLoginWhitelist('/learn/*') webRouter.use('/learn/latex', LearnProxyController.learnPage) - AuthenticationController.addEndpointToLoginWhitelist('/learn/latex/*') }, } \ No newline at end of file diff --git a/services/web/modules/learn/app/src/checkSanitizeOptions.mjs b/services/web/modules/learn/app/src/checkSanitizeOptions.mjs new file mode 100644 index 0000000000..3ae9410434 --- /dev/null +++ b/services/web/modules/learn/app/src/checkSanitizeOptions.mjs @@ -0,0 +1,118 @@ +import crypto from 'node:crypto' +import fs from 'node:fs' +import Path from 'node:path' +import cheerio from 'cheerio' +// checkSanitizeOptions is only used in dev env +// eslint-disable-next-line import/no-extraneous-dependencies +import * as prettier from 'prettier' +import sanitizeHtml from 'sanitize-html' +import { sanitizeOptions } from './sanitizeOptions.mjs' +import { fileURLToPath } from 'node:url' + +const __dirname = Path.dirname(fileURLToPath(import.meta.url)) +const EXTRACT_STYLE = process.env.EXTRACT_STYLES === 'true' +const OMIT_STYLE = process.env.OMIT_STYLE !== 'false' +const DUMP_CSS_IN = Path.join( + Path.dirname(Path.dirname(Path.dirname(__dirname))), + 'data', + 'dumpFolder' +) + +function hash(blob) { + return crypto.createHash('sha1').update(blob).digest('hex') +} + +function normalize(blob, title) { + // styles are dropped in web and kept in wiki pages for previewing there. + blob = blob.replace(/