diff --git a/develop/dev.env b/develop/dev.env index cf8d1991c1..47c7006eef 100644 --- a/develop/dev.env +++ b/develop/dev.env @@ -22,3 +22,7 @@ WEBPACK_HOST=webpack WEB_API_PASSWORD=overleaf WEB_API_USER=overleaf WEB_HOST=web +ADMIN_PRIVILEGE_AVAILABLE=true +V1_HISTORY_URL=http://history-v1:3100/api +OT_JWT_AUTH_KEY="very secret key" +OVERLEAF_PROXY_LEARN=true \ No newline at end of file diff --git a/server-ce/config/settings.js b/server-ce/config/settings.js index 640f1da0ce..5e88650ce3 100644 --- a/server-ce/config/settings.js +++ b/server-ce/config/settings.js @@ -174,6 +174,8 @@ const settings = { clsiCacheDir: Path.join(DATA_DIR, 'cache'), // Where to write the output files to disk after running LaTeX outputDir: Path.join(DATA_DIR, 'output'), + // Where to cache learn pages + learnPagesFolder: Path.join(DATA_DIR, 'learnPages'), }, // Server Config diff --git a/server-ce/init_scripts/100_make_overleaf_data_dirs.sh b/server-ce/init_scripts/100_make_overleaf_data_dirs.sh index 2723ae7a61..fe9dd49f91 100755 --- a/server-ce/init_scripts/100_make_overleaf_data_dirs.sh +++ b/server-ce/init_scripts/100_make_overleaf_data_dirs.sh @@ -19,6 +19,9 @@ chown www-data:www-data /var/lib/overleaf/data/template_files mkdir -p /var/lib/overleaf/data/history chown www-data:www-data /var/lib/overleaf/data/history +mkdir -p /var/lib/overleaf/data/learnPages +chown www-data:www-data /var/lib/overleaf/data/learnPages + mkdir -p /var/lib/overleaf/tmp/projectHistories chown www-data:www-data /var/lib/overleaf/tmp/projectHistories diff --git a/services/web/config/settings.defaults.js b/services/web/config/settings.defaults.js index fb7c1e3585..7235ac584e 100644 --- a/services/web/config/settings.defaults.js +++ b/services/web/config/settings.defaults.js @@ -744,6 +744,7 @@ module.exports = { // them to disk here). dumpFolder: Path.resolve(__dirname, '../data/dumpFolder'), uploadFolder: Path.resolve(__dirname, '../data/uploads'), + learnPagesFolder: Path.resolve(__dirname, '../data/learnPages'), }, // Automatic Snapshots @@ -1062,6 +1063,7 @@ module.exports = { moduleImportSequence: [ 'history-v1', 'launchpad', + 'learn', 'server-ce-scripts', 'user-activate', 'sandboxed-compiles', diff --git a/services/web/modules/learn/app/src/LearnProxy.mjs b/services/web/modules/learn/app/src/LearnProxy.mjs new file mode 100644 index 0000000000..3affdc5b40 --- /dev/null +++ b/services/web/modules/learn/app/src/LearnProxy.mjs @@ -0,0 +1,110 @@ +import sanitizeHtml from 'sanitize-html' +import Settings from '@overleaf/settings' +import { sanitizeOptions } from './sanitizeOptions.mjs' +import fs from 'node:fs' +import logger from '@overleaf/logger' +import Path from 'node:path' +import { expressify } from '@overleaf/promise-utils' +import scrape from './scrape.mjs' +const { scrapeAndCachePage } = scrape + + +// Check if the filePath are older than maxCacheAge +// Based on Settings.apis.wiki.maxCacheAgeer +// If older, re-fetch and update the cache +async function checkFileCache(learnPagesFolder, pageName) { + const path = Path.join(learnPagesFolder, encodeURIComponent(pageName) + '.json') + // Check if file exists + let stat = null + let now = Date.now() + let mtime = 0 + try { + stat = await fs.promises.stat(path) + mtime = stat.mtime.getTime() + } catch (e) { + logger.error({ err: e }, `error stating cached page file: ${path}`) + } + + + // If the cache is older than maxCacheAge, refresh it + if (stat === null || now - mtime > Settings.apis.wiki.maxCacheAge) { + logger.debug({ + now: now, + mtime: mtime, + maxCacheAge: Settings.apis.wiki.maxCacheAge + }, `out of date cache detected for file: ${path}`) + + const BASE_URL = Settings.apis.wiki.url + + try { + await fs.promises.unlink(path) + logger.debug({}, `deleted cached page file to force re-fetching: ${path}`) + } catch (e) { + logger.error({ err: e }, `error deleting cached page file: ${path}`) + } + await scrapeAndCachePage(BASE_URL, pageName) + } + +} + +async function learnPage(req, res) { + let reqPath = req.path + // Trim leading '/', only show the path after '/' + if (reqPath.startsWith('/')) { + reqPath = reqPath.slice(1) + } else { + res.status(400).send('Bad Request') + return + } + let learnPath = reqPath + + if (learnPath === '') { + logger.debug({}, 'Learn proxy requested root path, redirecting to Main/Page') + learnPath = 'Main Page' + } + + // Encode the path for file system usage + learnPath = encodeURIComponent(decodeURIComponent(learnPath.replace(/_/g, ' '))) + logger.debug({}, `Learn proxy requested path: ${learnPath}`) + + // Contents.json Should be sidebarHtml + let contentsFilePath = Path.resolve(Settings.path.learnPagesFolder, `Contents.json`) + + // If Contents.json does not exist, return 500 + if (!fs.existsSync(contentsFilePath)) { + await checkFileCache(Settings.path.learnPagesFolder, 'Contents') + return + } + + await checkFileCache(Settings.path.learnPagesFolder, 'Contents') + const raw = await fs.promises.readFile(contentsFilePath, 'utf-8') + const json = JSON.parse(raw) + const sidebarHtml = json.text['*'] + + let pageFilePath = Path.resolve(Settings.path.learnPagesFolder, `${learnPath}.json`) + // If the page does not exist, fallback to "Learn LaTeX in 30 minutes" + if (!fs.existsSync(pageFilePath)) { + learnPath = 'Learn%20LaTeX%20in%2030%20minutes' + pageFilePath = Path.resolve(Settings.path.learnPagesFolder, `${learnPath}.json`) + } + + await checkFileCache(Settings.path.learnPagesFolder, decodeURIComponent(learnPath)) + const pageRaw = await fs.promises.readFile(pageFilePath, 'utf-8') + const pageJson = JSON.parse(pageRaw) + const pageTitle = pageJson.title + const pageHtml = pageJson.text['*'] + + res.render(Path.resolve(import.meta.dirname, '../views/learn'), { + sidebarHtml: sanitizeHtml(sidebarHtml, sanitizeOptions), + pageTitle: pageTitle, + pageHtml: sanitizeHtml(pageHtml, sanitizeOptions), + }) + + +} + +const LearnProxyController = { + learnPage: expressify(learnPage), +} + +export default LearnProxyController diff --git a/services/web/modules/learn/app/src/LearnRouter.mjs b/services/web/modules/learn/app/src/LearnRouter.mjs new file mode 100644 index 0000000000..83cc770051 --- /dev/null +++ b/services/web/modules/learn/app/src/LearnRouter.mjs @@ -0,0 +1,16 @@ +import logger from '@overleaf/logger' +import Settings from '@overleaf/settings' +import LearnProxyController from './LearnProxy.mjs' +import AuthenticationController from '../../../../app/src/Features/Authentication/AuthenticationController.mjs' + +export default { + apply(webRouter) { + if (!Settings.proxyLearn) { + logger.debug({}, 'Learn proxy disabled via Settings.proxyLearn') + return + } + + webRouter.get('/learn', LearnProxyController.learnPage) + webRouter.use('/learn/latex', LearnProxyController.learnPage) + }, +} \ No newline at end of file diff --git a/services/web/modules/learn/app/src/checkSanitizeOptions.mjs b/services/web/modules/learn/app/src/checkSanitizeOptions.mjs new file mode 100644 index 0000000000..3ae9410434 --- /dev/null +++ b/services/web/modules/learn/app/src/checkSanitizeOptions.mjs @@ -0,0 +1,118 @@ +import crypto from 'node:crypto' +import fs from 'node:fs' +import Path from 'node:path' +import cheerio from 'cheerio' +// checkSanitizeOptions is only used in dev env +// eslint-disable-next-line import/no-extraneous-dependencies +import * as prettier from 'prettier' +import sanitizeHtml from 'sanitize-html' +import { sanitizeOptions } from './sanitizeOptions.mjs' +import { fileURLToPath } from 'node:url' + +const __dirname = Path.dirname(fileURLToPath(import.meta.url)) +const EXTRACT_STYLE = process.env.EXTRACT_STYLES === 'true' +const OMIT_STYLE = process.env.OMIT_STYLE !== 'false' +const DUMP_CSS_IN = Path.join( + Path.dirname(Path.dirname(Path.dirname(__dirname))), + 'data', + 'dumpFolder' +) + +function hash(blob) { + return crypto.createHash('sha1').update(blob).digest('hex') +} + +function normalize(blob, title) { + // styles are dropped in web and kept in wiki pages for previewing there. + blob = blob.replace(/