Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
109 changes: 109 additions & 0 deletions scripts/regenerate_spec_outputs.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,109 @@
#!/usr/bin/env python3
"""
Script to regenerate expected outputs for all specification tests.

This should be run when the processor output format changes to update
all test expectations to match the current behavior.
"""

import sys
from pathlib import Path

# Add src to path
sys.path.insert(0, str(Path(__file__).parent.parent / "src"))

from knowledgebase_processor.processor.processor import Processor
from knowledgebase_processor.utils.document_registry import DocumentRegistry
from knowledgebase_processor.utils.id_generator import EntityIdGenerator
from knowledgebase_processor.extractor.markdown import MarkdownExtractor
from knowledgebase_processor.extractor.frontmatter import FrontmatterExtractor
from knowledgebase_processor.extractor.heading_section import HeadingSectionExtractor
from knowledgebase_processor.extractor.link_reference import LinkReferenceExtractor
from knowledgebase_processor.extractor.code_quote import CodeQuoteExtractor
from knowledgebase_processor.extractor.todo_item import TodoItemExtractor
from knowledgebase_processor.extractor.tags import TagExtractor
from knowledgebase_processor.extractor.list_table import ListTableExtractor


def setup_processor():
"""Setup a processor with all necessary extractors."""
document_registry = DocumentRegistry()
id_generator = EntityIdGenerator(base_url="http://example.org/kb/")

processor = Processor(
document_registry=document_registry,
id_generator=id_generator,
config=None,
)

# Register all extractors
processor.register_extractor(MarkdownExtractor())
processor.register_extractor(FrontmatterExtractor())
processor.register_extractor(HeadingSectionExtractor())
processor.register_extractor(LinkReferenceExtractor())
processor.register_extractor(CodeQuoteExtractor())
processor.register_extractor(TodoItemExtractor())
processor.register_extractor(TagExtractor())
processor.register_extractor(ListTableExtractor())

return processor


def regenerate_all_outputs():
"""Regenerate expected outputs for all test cases."""
specs_dir = Path(__file__).parent.parent / "specs" / "test_cases"

if not specs_dir.exists():
print(f"Error: Test cases directory not found: {specs_dir}")
return 1

test_case_dirs = sorted([d for d in specs_dir.iterdir() if d.is_dir()])

if not test_case_dirs:
print(f"Error: No test case directories found in {specs_dir}")
return 1

print(f"Found {len(test_case_dirs)} test cases to regenerate")
print()

success_count = 0
error_count = 0

for test_case_dir in test_case_dirs:
input_file = test_case_dir / "input.md"
output_file = test_case_dir / "expected_output.ttl"

if not input_file.exists():
print(f"⚠️ Skipping {test_case_dir.name}: input.md not found")
error_count += 1
continue

try:
# Setup fresh processor for each test
processor = setup_processor()

# Read input
content = input_file.read_text(encoding='utf-8')

# Process to graph
document_id = f"test_cases/{test_case_dir.name}"
graph = processor.process_content_to_graph(content, document_id=document_id)

# Write expected output
output_file.write_text(graph.serialize(format='turtle'), encoding='utf-8')

print(f"✅ {test_case_dir.name}")
success_count += 1

except Exception as e:
print(f"❌ {test_case_dir.name}: {str(e)}")
error_count += 1

print()
print(f"Summary: {success_count} succeeded, {error_count} failed")

return 0 if error_count == 0 else 1


if __name__ == "__main__":
sys.exit(regenerate_all_outputs())
4 changes: 3 additions & 1 deletion specs/test_cases/code_01_empty_document/expected_output.ttl
Original file line number Diff line number Diff line change
Expand Up @@ -10,5 +10,7 @@
kb:originalPath "temp_document.md"^^xsd:string ;
kb:pathWithoutExtension "temp_document"^^xsd:string ;
kb:sourceDocument <http://example.org/kb/vocab#/test_cases/code_01_empty_document> ;
rdfs:seeAlso <http://example.org/kb/vocab#/test_cases/code_01_empty_document> .
rdfs:seeAlso <http://example.org/kb/vocab#/test_cases/code_01_empty_document> ;
schema:dateCreated "2025-11-05T14:56:11.529068+00:00"^^xsd:dateTime ;
schema:dateModified "2025-11-05T14:56:11.529074+00:00"^^xsd:dateTime .

32 changes: 31 additions & 1 deletion specs/test_cases/code_02_no_language/expected_output.ttl
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,42 @@
@prefix schema: <https://schema.org/> .
@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .

<http://example.org/kb/documents/test_cases/code_02_no_language/code/unknown-pos-0-2> a kb:CodeBlock,
kb:Entity,
schema:SoftwareSourceCode ;
rdfs:label "unknown code block"^^xsd:string ;
kb:lineCount 1 ;
kb:positionEnd 2 ;
kb:positionStart 0 ;
kb:sourceDocument <http://example.org/kb/vocab#/test_cases/code_02_no_language> ;
rdfs:seeAlso <http://example.org/kb/documents/test_cases/code_02_no_language/code/unknown-pos-0-2> ;
schema:dateCreated "2025-11-05T14:56:11.536445+00:00"^^xsd:dateTime ;
schema:dateModified "2025-11-05T14:56:11.536445+00:00"^^xsd:dateTime ;
schema:text "print('Hello, world!')"^^xsd:string .

<http://example.org/kb/documents/test_cases/code_02_no_language/code/unknown-pos-0-3> a kb:CodeBlock,
kb:Entity,
schema:SoftwareSourceCode ;
rdfs:label "unknown code block"^^xsd:string ;
kb:lineCount 1 ;
kb:positionEnd 3 ;
kb:positionStart 0 ;
kb:sourceDocument <http://example.org/kb/vocab#/test_cases/code_02_no_language> ;
rdfs:seeAlso <http://example.org/kb/documents/test_cases/code_02_no_language/code/unknown-pos-0-3> ;
schema:dateCreated "2025-11-05T14:56:11.536400+00:00"^^xsd:dateTime ;
schema:dateModified "2025-11-05T14:56:11.536402+00:00"^^xsd:dateTime ;
schema:programmingLanguage ""^^xsd:string ;
schema:text """print('Hello, world!')
"""^^xsd:string .

<http://example.org/kb/vocab#/test_cases/code_02_no_language> a kb:Document,
kb:Entity,
schema:CreativeWork ;
rdfs:label "Temporary Document"^^xsd:string ;
kb:originalPath "temp_document.md"^^xsd:string ;
kb:pathWithoutExtension "temp_document"^^xsd:string ;
kb:sourceDocument <http://example.org/kb/vocab#/test_cases/code_02_no_language> ;
rdfs:seeAlso <http://example.org/kb/vocab#/test_cases/code_02_no_language> .
rdfs:seeAlso <http://example.org/kb/vocab#/test_cases/code_02_no_language> ;
schema:dateCreated "2025-11-05T14:56:11.535768+00:00"^^xsd:dateTime ;
schema:dateModified "2025-11-05T14:56:11.535770+00:00"^^xsd:dateTime .

33 changes: 32 additions & 1 deletion specs/test_cases/code_03_with_language/expected_output.ttl
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,43 @@
@prefix schema: <https://schema.org/> .
@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .

<http://example.org/kb/documents/test_cases/code_03_with_language/code/python-pos-0-2> a kb:CodeBlock,
kb:Entity,
schema:SoftwareSourceCode ;
rdfs:label "python code block"^^xsd:string ;
kb:lineCount 1 ;
kb:positionEnd 2 ;
kb:positionStart 0 ;
kb:sourceDocument <http://example.org/kb/vocab#/test_cases/code_03_with_language> ;
rdfs:seeAlso <http://example.org/kb/documents/test_cases/code_03_with_language/code/python-pos-0-2> ;
schema:dateCreated "2025-11-05T14:56:11.540675+00:00"^^xsd:dateTime ;
schema:dateModified "2025-11-05T14:56:11.540676+00:00"^^xsd:dateTime ;
schema:programmingLanguage "python"^^xsd:string ;
schema:text "print('Hello, world!')"^^xsd:string .

<http://example.org/kb/documents/test_cases/code_03_with_language/code/python-pos-0-3> a kb:CodeBlock,
kb:Entity,
schema:SoftwareSourceCode ;
rdfs:label "python code block"^^xsd:string ;
kb:lineCount 1 ;
kb:positionEnd 3 ;
kb:positionStart 0 ;
kb:sourceDocument <http://example.org/kb/vocab#/test_cases/code_03_with_language> ;
rdfs:seeAlso <http://example.org/kb/documents/test_cases/code_03_with_language/code/python-pos-0-3> ;
schema:dateCreated "2025-11-05T14:56:11.540647+00:00"^^xsd:dateTime ;
schema:dateModified "2025-11-05T14:56:11.540648+00:00"^^xsd:dateTime ;
schema:programmingLanguage "python"^^xsd:string ;
schema:text """print('Hello, world!')
"""^^xsd:string .

<http://example.org/kb/vocab#/test_cases/code_03_with_language> a kb:Document,
kb:Entity,
schema:CreativeWork ;
rdfs:label "Temporary Document"^^xsd:string ;
kb:originalPath "temp_document.md"^^xsd:string ;
kb:pathWithoutExtension "temp_document"^^xsd:string ;
kb:sourceDocument <http://example.org/kb/vocab#/test_cases/code_03_with_language> ;
rdfs:seeAlso <http://example.org/kb/vocab#/test_cases/code_03_with_language> .
rdfs:seeAlso <http://example.org/kb/vocab#/test_cases/code_03_with_language> ;
schema:dateCreated "2025-11-05T14:56:11.540293+00:00"^^xsd:dateTime ;
schema:dateModified "2025-11-05T14:56:11.540294+00:00"^^xsd:dateTime .

107 changes: 106 additions & 1 deletion specs/test_cases/code_04_multiple_blocks/expected_output.ttl
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,117 @@
@prefix schema: <https://schema.org/> .
@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .

<http://example.org/kb/documents/test_cases/code_04_multiple_blocks/code/javascript-pos-11-15> a kb:CodeBlock,
kb:Entity,
schema:SoftwareSourceCode ;
rdfs:label "javascript code block"^^xsd:string ;
kb:lineCount 3 ;
kb:positionEnd 15 ;
kb:positionStart 11 ;
kb:sourceDocument <http://example.org/kb/vocab#/test_cases/code_04_multiple_blocks> ;
rdfs:seeAlso <http://example.org/kb/documents/test_cases/code_04_multiple_blocks/code/javascript-pos-11-15> ;
schema:dateCreated "2025-11-05T14:56:11.545508+00:00"^^xsd:dateTime ;
schema:dateModified "2025-11-05T14:56:11.545508+00:00"^^xsd:dateTime ;
schema:programmingLanguage "javascript"^^xsd:string ;
schema:text """function hello() {
console.log('Hello, world!');
}"""^^xsd:string .

<http://example.org/kb/documents/test_cases/code_04_multiple_blocks/code/javascript-pos-11-16> a kb:CodeBlock,
kb:Entity,
schema:SoftwareSourceCode ;
rdfs:label "javascript code block"^^xsd:string ;
kb:lineCount 3 ;
kb:positionEnd 16 ;
kb:positionStart 11 ;
kb:sourceDocument <http://example.org/kb/vocab#/test_cases/code_04_multiple_blocks> ;
rdfs:seeAlso <http://example.org/kb/documents/test_cases/code_04_multiple_blocks/code/javascript-pos-11-16> ;
schema:dateCreated "2025-11-05T14:56:11.545423+00:00"^^xsd:dateTime ;
schema:dateModified "2025-11-05T14:56:11.545423+00:00"^^xsd:dateTime ;
schema:programmingLanguage "javascript"^^xsd:string ;
schema:text """function hello() {
console.log('Hello, world!');
}
"""^^xsd:string .

<http://example.org/kb/documents/test_cases/code_04_multiple_blocks/code/python-pos-4-7> a kb:CodeBlock,
kb:Entity,
schema:SoftwareSourceCode ;
rdfs:label "python code block"^^xsd:string ;
kb:lineCount 2 ;
kb:positionEnd 7 ;
kb:positionStart 4 ;
kb:sourceDocument <http://example.org/kb/vocab#/test_cases/code_04_multiple_blocks> ;
rdfs:seeAlso <http://example.org/kb/documents/test_cases/code_04_multiple_blocks/code/python-pos-4-7> ;
schema:dateCreated "2025-11-05T14:56:11.545482+00:00"^^xsd:dateTime ;
schema:dateModified "2025-11-05T14:56:11.545483+00:00"^^xsd:dateTime ;
schema:programmingLanguage "python"^^xsd:string ;
schema:text """def hello():
print('Hello, world!')"""^^xsd:string .

<http://example.org/kb/documents/test_cases/code_04_multiple_blocks/code/python-pos-4-8> a kb:CodeBlock,
kb:Entity,
schema:SoftwareSourceCode ;
rdfs:label "python code block"^^xsd:string ;
kb:lineCount 2 ;
kb:positionEnd 8 ;
kb:positionStart 4 ;
kb:sourceDocument <http://example.org/kb/vocab#/test_cases/code_04_multiple_blocks> ;
rdfs:seeAlso <http://example.org/kb/documents/test_cases/code_04_multiple_blocks/code/python-pos-4-8> ;
schema:dateCreated "2025-11-05T14:56:11.545399+00:00"^^xsd:dateTime ;
schema:dateModified "2025-11-05T14:56:11.545400+00:00"^^xsd:dateTime ;
schema:programmingLanguage "python"^^xsd:string ;
schema:text """def hello():
print('Hello, world!')
"""^^xsd:string .

<http://example.org/kb/documents/test_cases/code_04_multiple_blocks/section/pos-0-16> a kb:Entity,
kb:Section,
schema:Article ;
rdfs:label "Section 0-16"^^xsd:string ;
kb:hasHeading <http://example.org/kb/documents/test_cases/code_04_multiple_blocks/heading/h1-code-examples> ;
kb:positionEnd 16 ;
kb:positionStart 0 ;
kb:sourceDocument <http://example.org/kb/vocab#/test_cases/code_04_multiple_blocks> ;
rdfs:seeAlso <http://example.org/kb/documents/test_cases/code_04_multiple_blocks/section/pos-0-16> ;
schema:dateCreated "2025-11-05T14:56:11.545371+00:00"^^xsd:dateTime ;
schema:dateModified "2025-11-05T14:56:11.545372+00:00"^^xsd:dateTime .

<http://example.org/kb/documents/test_cases/code_04_multiple_blocks/section/pos-1-15> a kb:Entity,
kb:Section,
schema:Article ;
rdfs:label "Section 1-15"^^xsd:string ;
kb:hasHeading <http://example.org/kb/documents/test_cases/code_04_multiple_blocks/heading/h1-code-examples> ;
kb:positionEnd 15 ;
kb:positionStart 1 ;
kb:sourceDocument <http://example.org/kb/vocab#/test_cases/code_04_multiple_blocks> ;
rdfs:seeAlso <http://example.org/kb/documents/test_cases/code_04_multiple_blocks/section/pos-1-15> ;
schema:dateCreated "2025-11-05T14:56:11.545461+00:00"^^xsd:dateTime ;
schema:dateModified "2025-11-05T14:56:11.545462+00:00"^^xsd:dateTime .

<http://example.org/kb/documents/test_cases/code_04_multiple_blocks/heading/h1-code-examples> a kb:Entity,
kb:Heading,
schema:Article ;
rdfs:label "Code Examples"^^xsd:string ;
kb:headingLevel 1 ;
kb:positionEnd 0 ;
kb:positionStart 0 ;
kb:sourceDocument <http://example.org/kb/vocab#/test_cases/code_04_multiple_blocks> ;
rdfs:seeAlso <http://example.org/kb/documents/test_cases/code_04_multiple_blocks/heading/h1-code-examples> ;
schema:dateCreated "2025-11-05T14:56:11.545336+00:00"^^xsd:dateTime,
"2025-11-05T14:56:11.545441+00:00"^^xsd:dateTime ;
schema:dateModified "2025-11-05T14:56:11.545337+00:00"^^xsd:dateTime,
"2025-11-05T14:56:11.545441+00:00"^^xsd:dateTime ;
schema:headline "Code Examples"^^xsd:string .

<http://example.org/kb/vocab#/test_cases/code_04_multiple_blocks> a kb:Document,
kb:Entity,
schema:CreativeWork ;
rdfs:label "Temporary Document"^^xsd:string ;
kb:originalPath "temp_document.md"^^xsd:string ;
kb:pathWithoutExtension "temp_document"^^xsd:string ;
kb:sourceDocument <http://example.org/kb/vocab#/test_cases/code_04_multiple_blocks> ;
rdfs:seeAlso <http://example.org/kb/vocab#/test_cases/code_04_multiple_blocks> .
rdfs:seeAlso <http://example.org/kb/vocab#/test_cases/code_04_multiple_blocks> ;
schema:dateCreated "2025-11-05T14:56:11.544454+00:00"^^xsd:dateTime ;
schema:dateModified "2025-11-05T14:56:11.544455+00:00"^^xsd:dateTime .

19 changes: 18 additions & 1 deletion specs/test_cases/code_05_simple_blockquote/expected_output.ttl
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,29 @@
@prefix schema: <https://schema.org/> .
@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .

<http://example.org/kb/documents/test_cases/code_05_simple_blockquote/blockquote/this-is-a-blockquote> a kb:Blockquote,
kb:Entity,
schema:Quotation ;
rdfs:label "This is a blockquote."^^xsd:string ;
kb:nestingLevel 1 ;
kb:positionEnd 0 ;
kb:positionStart 0 ;
kb:sourceDocument <http://example.org/kb/vocab#/test_cases/code_05_simple_blockquote> ;
rdfs:seeAlso <http://example.org/kb/documents/test_cases/code_05_simple_blockquote/blockquote/this-is-a-blockquote> ;
schema:dateCreated "2025-11-05T14:56:11.557517+00:00"^^xsd:dateTime,
"2025-11-05T14:56:11.557547+00:00"^^xsd:dateTime ;
schema:dateModified "2025-11-05T14:56:11.557518+00:00"^^xsd:dateTime,
"2025-11-05T14:56:11.557547+00:00"^^xsd:dateTime ;
schema:text "This is a blockquote."^^xsd:string .

<http://example.org/kb/vocab#/test_cases/code_05_simple_blockquote> a kb:Document,
kb:Entity,
schema:CreativeWork ;
rdfs:label "Temporary Document"^^xsd:string ;
kb:originalPath "temp_document.md"^^xsd:string ;
kb:pathWithoutExtension "temp_document"^^xsd:string ;
kb:sourceDocument <http://example.org/kb/vocab#/test_cases/code_05_simple_blockquote> ;
rdfs:seeAlso <http://example.org/kb/vocab#/test_cases/code_05_simple_blockquote> .
rdfs:seeAlso <http://example.org/kb/vocab#/test_cases/code_05_simple_blockquote> ;
schema:dateCreated "2025-11-05T14:56:11.557057+00:00"^^xsd:dateTime ;
schema:dateModified "2025-11-05T14:56:11.557058+00:00"^^xsd:dateTime .

Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,34 @@
@prefix schema: <https://schema.org/> .
@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .

<http://example.org/kb/documents/test_cases/code_06_multiline_blockquote/blockquote/this-is-a-blockquote-with-multiple-lines-spanning> a kb:Blockquote,
kb:Entity,
schema:Quotation ;
rdfs:label """This is a blockquote
with multiple lines
spanning """^^xsd:string ;
kb:nestingLevel 1 ;
kb:positionEnd 0,
2 ;
kb:positionStart 0 ;
kb:sourceDocument <http://example.org/kb/vocab#/test_cases/code_06_multiline_blockquote> ;
rdfs:seeAlso <http://example.org/kb/documents/test_cases/code_06_multiline_blockquote/blockquote/this-is-a-blockquote-with-multiple-lines-spanning> ;
schema:dateCreated "2025-11-05T14:56:11.561652+00:00"^^xsd:dateTime,
"2025-11-05T14:56:11.561678+00:00"^^xsd:dateTime ;
schema:dateModified "2025-11-05T14:56:11.561653+00:00"^^xsd:dateTime,
"2025-11-05T14:56:11.561678+00:00"^^xsd:dateTime ;
schema:text """This is a blockquote
with multiple lines
spanning three lines."""^^xsd:string .

<http://example.org/kb/vocab#/test_cases/code_06_multiline_blockquote> a kb:Document,
kb:Entity,
schema:CreativeWork ;
rdfs:label "Temporary Document"^^xsd:string ;
kb:originalPath "temp_document.md"^^xsd:string ;
kb:pathWithoutExtension "temp_document"^^xsd:string ;
kb:sourceDocument <http://example.org/kb/vocab#/test_cases/code_06_multiline_blockquote> ;
rdfs:seeAlso <http://example.org/kb/vocab#/test_cases/code_06_multiline_blockquote> .
rdfs:seeAlso <http://example.org/kb/vocab#/test_cases/code_06_multiline_blockquote> ;
schema:dateCreated "2025-11-05T14:56:11.561148+00:00"^^xsd:dateTime ;
schema:dateModified "2025-11-05T14:56:11.561149+00:00"^^xsd:dateTime .

Loading