From a1c13d907e170bc49232d4bffba32461f48a80e6 Mon Sep 17 00:00:00 2001 From: Lukas Mager <7467162+lkmgr@users.noreply.github.com> Date: Thu, 30 Oct 2025 11:51:48 +0100 Subject: [PATCH] fix: handle errors during `getLinks` parsing --- src/crawler.ts | 18 +++++++++++++++--- src/links.ts | 12 ++++++++---- test/test.index.ts | 20 ++++++++++++++++++++ test/test.links.ts | 22 ++++++++++++++++++++++ 4 files changed, 65 insertions(+), 7 deletions(-) create mode 100644 test/test.links.ts diff --git a/src/crawler.ts b/src/crawler.ts index 7df26db6..8f82c501 100644 --- a/src/crawler.ts +++ b/src/crawler.ts @@ -186,10 +186,22 @@ export class LinkChecker extends EventEmitter { const state = status >= 200 && status < 300 ? LinkState.OK : LinkState.BROKEN; - this.emitResult(opts, state, status, failures); - // Recurse if body is HTML and crawling is enabled - await this.maybeRecurse(opts, response); + if (state === LinkState.BROKEN) { + this.emitResult(opts, state, status, failures); + return; + } + + try { + // Recurse if body is HTML and crawling is enabled + await this.maybeRecurse(opts, response); + this.emitResult(opts, state, status, failures); + } catch (error) { + // Report as a broken link when parsing body failed + this.emitResult(opts, LinkState.BROKEN, 0, [ + { cause: (error as Error).cause, message: (error as Error).message }, + ]); + } } // Perform fetch, handle retry on 429, collect failures diff --git a/src/links.ts b/src/links.ts index bd58938e..0de14599 100644 --- a/src/links.ts +++ b/src/links.ts @@ -183,10 +183,14 @@ export async function getLinks( }, }); await new Promise((resolve, reject) => { - Stream.Readable.fromWeb(source as import('stream/web').ReadableStream) - .pipe(parser) - .on('finish', resolve) - .on('error', reject); + const rs = Stream.Readable.fromWeb( + source as import('stream/web').ReadableStream, + ); + + // Reject on Readable error + rs.on('error', reject); + + rs.pipe(parser).on('finish', resolve).on('error', reject); }); return links; } diff --git a/test/test.index.ts b/test/test.index.ts index be7619dc..57061679 100644 --- a/test/test.index.ts +++ b/test/test.index.ts @@ -8,6 +8,7 @@ import { LinkState, check, } from '../src/index.js'; +import * as linksMethods from '../src/links.ts'; import { DEFAULT_OPTIONS } from '../src/options.ts'; import { invertedPromise } from './utils.ts'; @@ -629,6 +630,25 @@ describe('linkinator', () => { scope.done(); }); + it('should treat link as broken when getLinks throws', async () => { + const parseErr = new Error('Parsing failure'); + const spy = vi.spyOn(linksMethods, 'getLinks').mockRejectedValue(parseErr); + + const checker = new LinkChecker(); + const results = await checker.check({ + path: 'test/fixtures/basic', + }); + + assert.ok(!results.passed); + assert.strictEqual(results.links[0].state, LinkState.BROKEN); + assert.strictEqual( + (results.links[0]?.failureDetails?.[0] as Error).message, + 'Parsing failure', + ); + + spy.mockRestore(); + }); + describe('element metadata', () => { it('should provide text in results', async () => { const scope = nock('http://example.invalid').head('/').reply(404); diff --git a/test/test.links.ts b/test/test.links.ts new file mode 100644 index 00000000..4fbcfa29 --- /dev/null +++ b/test/test.links.ts @@ -0,0 +1,22 @@ +import { describe, expect, it } from 'vitest'; +import { getLinks } from '../src/links.js'; + +describe('getLinks', () => { + it('should reject when the HTML stream emits an error', async () => { + const body = new ReadableStream({ + start(controller) { + setTimeout(() => controller.error(new Error('StreamError')), 0); + }, + }); + + const response = { + body, + headers: new Headers({ 'content-type': 'text/html' }), + } as unknown as Response; + + // Expect getLinks to reject with our error, + await expect(getLinks(response, 'http://example.invalid')).rejects.toThrow( + 'StreamError', + ); + }); +});