diff --git a/src/main/java/de/rub/nds/crawler/constant/JobStatus.java b/src/main/java/de/rub/nds/crawler/constant/JobStatus.java index 99c521b..03765fd 100644 --- a/src/main/java/de/rub/nds/crawler/constant/JobStatus.java +++ b/src/main/java/de/rub/nds/crawler/constant/JobStatus.java @@ -15,6 +15,8 @@ public enum JobStatus { /** Job is waiting to be executed. */ TO_BE_EXECUTED(false), + /** Job is currently being executed. Partial results may be available in DB. */ + RUNNING(false), /** The domain was not resolvable. An empty result was written to DB. */ UNRESOLVABLE(true), /** An uncaught exception occurred while resolving the host. */ diff --git a/src/main/java/de/rub/nds/crawler/core/BulkScanWorker.java b/src/main/java/de/rub/nds/crawler/core/BulkScanWorker.java index 11831cc..8fb9b03 100644 --- a/src/main/java/de/rub/nds/crawler/core/BulkScanWorker.java +++ b/src/main/java/de/rub/nds/crawler/core/BulkScanWorker.java @@ -9,10 +9,11 @@ package de.rub.nds.crawler.core; import de.rub.nds.crawler.data.ScanConfig; +import de.rub.nds.crawler.data.ScanJobDescription; import de.rub.nds.crawler.data.ScanTarget; +import de.rub.nds.crawler.persistence.IPersistenceProvider; import de.rub.nds.crawler.util.CanceallableThreadPoolExecutor; import de.rub.nds.scanner.core.execution.NamedThreadFactory; -import java.util.concurrent.Future; import java.util.concurrent.LinkedBlockingDeque; import java.util.concurrent.ThreadPoolExecutor; import java.util.concurrent.TimeUnit; @@ -41,6 +42,9 @@ public abstract class BulkScanWorker { /** The scan configuration for this worker */ protected final T scanConfig; + /** The persistence provider for writing partial results */ + private IPersistenceProvider persistenceProvider; + /** * Calls the inner scan function and may handle cleanup. This is needed to wrap the scanner into * a future object such that we can handle timeouts properly. @@ -74,31 +78,53 @@ protected BulkScanWorker(String bulkScanId, T scanConfig, int parallelScanThread * Handles a scan target by submitting it to the executor. If init was not called, it will * initialize itself. In this case it will also clean up itself if all jobs are done. * - * @param scanTarget The target to scan. - * @return A future that resolves to the scan result once the scan is done. + *

Returns a {@link ScheduledScan} that represents the entire scan lifecycle, allowing + * callers to: + * + *

+ * + * @param jobDescription The job description for this scan. + * @return A ScheduledScan representing the scan lifecycle */ - public Future handle(ScanTarget scanTarget) { + public ScheduledScan handle(ScanJobDescription jobDescription) { // if we initialized ourself, we also clean up ourself shouldCleanupSelf.weakCompareAndSetAcquire(false, init()); activeJobs.incrementAndGet(); - return timeoutExecutor.submit( + + ScheduledScan scheduledScan = new ScheduledScan(); + + timeoutExecutor.submit( () -> { - Document result = scan(scanTarget); - if (activeJobs.decrementAndGet() == 0 && shouldCleanupSelf.get()) { - cleanup(); + try { + Document result = scan(jobDescription, scheduledScan); + scheduledScan.complete(result); + if (activeJobs.decrementAndGet() == 0 && shouldCleanupSelf.get()) { + cleanup(); + } + } catch (Exception e) { + scheduledScan.completeExceptionally(e); + activeJobs.decrementAndGet(); + throw e; } - return result; }); + + return scheduledScan; } /** * Scans a target and returns the result as a Document. This is the core scanning functionality * that must be implemented by subclasses. * - * @param scanTarget The target to scan + * @param jobDescription The job description containing target and metadata + * @param scheduledScan The scheduled scan for reporting progress via {@link + * ScheduledScan#updateResult} * @return The scan result as a Document */ - public abstract Document scan(ScanTarget scanTarget); + public abstract Document scan(ScanJobDescription jobDescription, ScheduledScan scheduledScan); /** * Initializes this worker if it hasn't been initialized yet. This method is thread-safe and @@ -161,4 +187,26 @@ public final boolean cleanup() { * specific resources. */ protected abstract void cleanupInternal(); + + /** + * Sets the persistence provider for writing partial results. + * + * @param persistenceProvider The persistence provider to use + */ + public void setPersistenceProvider(IPersistenceProvider persistenceProvider) { + this.persistenceProvider = persistenceProvider; + } + + /** + * Persists a partial scan result. This method can be called by subclasses during scanning to + * save intermediate results. + * + * @param jobDescription The job description for the scan + * @param partialResult The partial result document to persist + */ + protected void persistPartialResult(ScanJobDescription jobDescription, Document partialResult) { + if (persistenceProvider != null) { + persistenceProvider.upsertPartialResult(jobDescription, partialResult); + } + } } diff --git a/src/main/java/de/rub/nds/crawler/core/BulkScanWorkerManager.java b/src/main/java/de/rub/nds/crawler/core/BulkScanWorkerManager.java index 3e78782..415e7e4 100644 --- a/src/main/java/de/rub/nds/crawler/core/BulkScanWorkerManager.java +++ b/src/main/java/de/rub/nds/crawler/core/BulkScanWorkerManager.java @@ -14,13 +14,12 @@ import de.rub.nds.crawler.data.BulkScanInfo; import de.rub.nds.crawler.data.ScanConfig; import de.rub.nds.crawler.data.ScanJobDescription; +import de.rub.nds.crawler.persistence.IPersistenceProvider; import java.util.concurrent.ExecutionException; -import java.util.concurrent.Future; import java.util.concurrent.TimeUnit; import org.apache.commons.lang3.exception.UncheckedException; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; -import org.bson.Document; /** * Each ScanJob has its own BulkScanWorker. This class manages the BulkScanWorkers and ensures that @@ -58,21 +57,27 @@ public static BulkScanWorkerManager getInstance() { /** * Static convenience method to handle a scan job. See also {@link #handle(ScanJobDescription, - * int, int)}. + * int, int, IPersistenceProvider)}. * * @param scanJobDescription The scan job to handle * @param parallelConnectionThreads The number of parallel connection threads to use (used to * create worker if it does not exist) * @param parallelScanThreads The number of parallel scan threads to use (used to create worker * if it does not exist) - * @return A future that returns the scan result when the target is scanned is done + * @param persistenceProvider The persistence provider for writing partial results + * @return A ScheduledScan representing the scan lifecycle */ - public static Future handleStatic( + public static ScheduledScan handleStatic( ScanJobDescription scanJobDescription, int parallelConnectionThreads, - int parallelScanThreads) { + int parallelScanThreads, + IPersistenceProvider persistenceProvider) { BulkScanWorkerManager manager = getInstance(); - return manager.handle(scanJobDescription, parallelConnectionThreads, parallelScanThreads); + return manager.handle( + scanJobDescription, + parallelConnectionThreads, + parallelScanThreads, + persistenceProvider); } private final Cache> bulkScanWorkers; @@ -135,12 +140,14 @@ public BulkScanWorker getBulkScanWorker( * create worker if it does not exist) * @param parallelScanThreads The number of parallel scan threads to use (used to create worker * if it does not exist) - * @return A future that returns the scan result when the target is scanned is done + * @param persistenceProvider The persistence provider for writing partial results + * @return A ScheduledScan representing the scan lifecycle */ - public Future handle( + public ScheduledScan handle( ScanJobDescription scanJobDescription, int parallelConnectionThreads, - int parallelScanThreads) { + int parallelScanThreads, + IPersistenceProvider persistenceProvider) { BulkScanInfo bulkScanInfo = scanJobDescription.getBulkScanInfo(); BulkScanWorker worker = getBulkScanWorker( @@ -148,6 +155,7 @@ public Future handle( bulkScanInfo.getScanConfig(), parallelConnectionThreads, parallelScanThreads); - return worker.handle(scanJobDescription.getScanTarget()); + worker.setPersistenceProvider(persistenceProvider); + return worker.handle(scanJobDescription); } } diff --git a/src/main/java/de/rub/nds/crawler/core/ScheduledScan.java b/src/main/java/de/rub/nds/crawler/core/ScheduledScan.java new file mode 100644 index 0000000..b67cd84 --- /dev/null +++ b/src/main/java/de/rub/nds/crawler/core/ScheduledScan.java @@ -0,0 +1,88 @@ +/* + * TLS-Crawler - A TLS scanning tool to perform large scale scans with the TLS-Scanner + * + * Copyright 2018-2023 Ruhr University Bochum, Paderborn University, and Hackmanit GmbH + * + * Licensed under Apache License, Version 2.0 + * http://www.apache.org/licenses/LICENSE-2.0.txt + */ +package de.rub.nds.crawler.core; + +import java.util.concurrent.CompletableFuture; +import java.util.concurrent.Future; +import org.bson.Document; + +/** + * Represents a scheduled scan that tracks progress and provides both partial and final results. + * + *

This class provides a clean abstraction for the scan lifecycle: + * + *

    + *
  • Check if the scan is complete via {@link #isComplete()} + *
  • Get the current result (partial or final) via {@link #getCurrentResult()} + *
  • Wait for the final result via {@link #getFinalResult()} + *
+ */ +public class ScheduledScan { + + private volatile Document currentResult; + private final CompletableFuture finalResult = new CompletableFuture<>(); + + /** + * Check if the scan has completed. + * + * @return true if the scan is complete, false if still in progress + */ + public boolean isComplete() { + return finalResult.isDone(); + } + + /** + * Get the current result document. If the scan is still in progress, this returns the latest + * partial result. If the scan is complete, this returns the final result. + * + * @return The current result document, or null if no result is available yet + */ + public Document getCurrentResult() { + return currentResult; + } + + /** + * Get a Future that will resolve to the final result when the scan completes. + * + * @return A Future containing the final scan result + */ + public Future getFinalResult() { + return finalResult; + } + + /** + * Update the current result. This is called by the scan worker when new partial results are + * available. + * + * @param partialResult The updated partial result document + */ + public void updateResult(Document partialResult) { + this.currentResult = partialResult; + } + + /** + * Mark the scan as complete with the final result. This will complete the Future and notify any + * waiting consumers. + * + * @param result The final scan result + */ + void complete(Document result) { + this.currentResult = result; + this.finalResult.complete(result); + } + + /** + * Mark the scan as failed with an exception. + * + * @param exception The exception that caused the failure + */ + void completeExceptionally(Throwable exception) { + this.finalResult.completeExceptionally(exception); + } +} diff --git a/src/main/java/de/rub/nds/crawler/core/Worker.java b/src/main/java/de/rub/nds/crawler/core/Worker.java index 1608e10..8509be4 100644 --- a/src/main/java/de/rub/nds/crawler/core/Worker.java +++ b/src/main/java/de/rub/nds/crawler/core/Worker.java @@ -70,10 +70,11 @@ public void start() { } private ScanResult waitForScanResult( - Future resultFuture, ScanJobDescription scanJobDescription) + ScheduledScan scheduledScan, ScanJobDescription scanJobDescription) throws ExecutionException, InterruptedException, TimeoutException { Document resultDocument; JobStatus jobStatus; + Future resultFuture = scheduledScan.getFinalResult(); try { resultDocument = resultFuture.get(scanTimeout, TimeUnit.MILLISECONDS); jobStatus = resultDocument != null ? JobStatus.SUCCESS : JobStatus.EMPTY; @@ -92,15 +93,19 @@ private ScanResult waitForScanResult( private void handleScanJob(ScanJobDescription scanJobDescription) { LOGGER.info("Received scan job for {}", scanJobDescription.getScanTarget()); - Future resultFuture = + ScheduledScan scheduledScan = BulkScanWorkerManager.handleStatic( - scanJobDescription, parallelConnectionThreads, parallelScanThreads); + scanJobDescription, + parallelConnectionThreads, + parallelScanThreads, + persistenceProvider); + workerExecutor.submit( () -> { ScanResult scanResult = null; boolean persist = true; try { - scanResult = waitForScanResult(resultFuture, scanJobDescription); + scanResult = waitForScanResult(scheduledScan, scanJobDescription); } catch (InterruptedException e) { LOGGER.error("Worker was interrupted - not persisting anything", e); scanJobDescription.setStatus(JobStatus.INTERNAL_ERROR); @@ -118,7 +123,7 @@ private void handleScanJob(ScanJobDescription scanJobDescription) { "Scan of '{}' did not finish in time and did not cancel gracefully", scanJobDescription.getScanTarget()); scanJobDescription.setStatus(JobStatus.CANCELLED); - resultFuture.cancel(true); + scheduledScan.getFinalResult().cancel(true); scanResult = ScanResult.fromException(scanJobDescription, e); } catch (Exception e) { LOGGER.error( diff --git a/src/main/java/de/rub/nds/crawler/persistence/IPersistenceProvider.java b/src/main/java/de/rub/nds/crawler/persistence/IPersistenceProvider.java index 30d2ffb..aec842e 100644 --- a/src/main/java/de/rub/nds/crawler/persistence/IPersistenceProvider.java +++ b/src/main/java/de/rub/nds/crawler/persistence/IPersistenceProvider.java @@ -73,4 +73,13 @@ public interface IPersistenceProvider { */ ScanResult getScanResultByScanJobDescriptionId( String dbName, String collectionName, String scanJobDescriptionId); + + /** + * Upsert a partial scan result into the database. Uses the job ID as the document ID, so + * subsequent calls with the same job will overwrite the previous partial result. + * + * @param job The scan job description (provides ID, database name, collection name). + * @param partialResult The partial result document to upsert. + */ + void upsertPartialResult(ScanJobDescription job, org.bson.Document partialResult); } diff --git a/src/main/java/de/rub/nds/crawler/persistence/MongoPersistenceProvider.java b/src/main/java/de/rub/nds/crawler/persistence/MongoPersistenceProvider.java index a1278c1..6eab7b6 100644 --- a/src/main/java/de/rub/nds/crawler/persistence/MongoPersistenceProvider.java +++ b/src/main/java/de/rub/nds/crawler/persistence/MongoPersistenceProvider.java @@ -25,6 +25,7 @@ import com.mongodb.client.MongoClients; import com.mongodb.client.MongoDatabase; import com.mongodb.client.model.Indexes; +import com.mongodb.client.model.ReplaceOptions; import com.mongodb.lang.NonNull; import de.rub.nds.crawler.config.delegate.MongoDbDelegate; import de.rub.nds.crawler.constant.JobStatus; @@ -392,4 +393,32 @@ public ScanResult getScanResultByScanJobDescriptionId( e); } } + + @Override + public void upsertPartialResult(ScanJobDescription job, org.bson.Document partialResult) { + String dbName = job.getDbName(); + String collectionName = job.getCollectionName(); + String jobId = job.getId().toString(); + + LOGGER.debug( + "Upserting partial result for job {} into collection: {}.{}", + jobId, + dbName, + collectionName); + + try { + // Get raw MongoDB collection (not JacksonMongoCollection) for Document operations + var collection = databaseCache.getUnchecked(dbName).getCollection(collectionName); + + // Upsert: replace if exists, insert if not + collection.replaceOne( + new org.bson.Document("_id", jobId), + partialResult, + new ReplaceOptions().upsert(true)); + + LOGGER.debug("Upserted partial result for job {}", jobId); + } catch (Exception e) { + LOGGER.warn("Failed to upsert partial result for job {}: {}", jobId, e.getMessage()); + } + } } diff --git a/src/test/java/de/rub/nds/crawler/core/BulkScanWorkerTest.java b/src/test/java/de/rub/nds/crawler/core/BulkScanWorkerTest.java new file mode 100644 index 0000000..009ce36 --- /dev/null +++ b/src/test/java/de/rub/nds/crawler/core/BulkScanWorkerTest.java @@ -0,0 +1,367 @@ +/* + * TLS-Crawler - A TLS scanning tool to perform large scale scans with the TLS-Scanner + * + * Copyright 2018-2023 Ruhr University Bochum, Paderborn University, and Hackmanit GmbH + * + * Licensed under Apache License, Version 2.0 + * http://www.apache.org/licenses/LICENSE-2.0.txt + */ +package de.rub.nds.crawler.core; + +import static org.junit.jupiter.api.Assertions.*; + +import de.rub.nds.crawler.constant.JobStatus; +import de.rub.nds.crawler.data.BulkScan; +import de.rub.nds.crawler.data.ScanConfig; +import de.rub.nds.crawler.data.ScanJobDescription; +import de.rub.nds.crawler.data.ScanResult; +import de.rub.nds.crawler.data.ScanTarget; +import de.rub.nds.crawler.dummy.DummyPersistenceProvider; +import java.io.Serializable; +import java.util.ArrayList; +import java.util.List; +import org.bson.Document; +import org.junit.jupiter.api.Test; + +class BulkScanWorkerTest { + + // Test implementation of ScanConfig + static class TestScanConfig extends ScanConfig implements Serializable { + public TestScanConfig() { + super(de.rub.nds.scanner.core.config.ScannerDetail.NORMAL, 0, 60); + } + + @Override + public BulkScanWorker createWorker( + String bulkScanID, int parallelConnectionThreads, int parallelScanThreads) { + return new TestBulkScanWorker(bulkScanID, this, parallelScanThreads); + } + } + + // Test implementation of BulkScanWorker + static class TestBulkScanWorker extends BulkScanWorker { + private boolean initCalled = false; + private boolean cleanupCalled = false; + private ScanJobDescription capturedJobDescription = null; + + TestBulkScanWorker(String bulkScanId, TestScanConfig scanConfig, int parallelScanThreads) { + super(bulkScanId, scanConfig, parallelScanThreads); + } + + @Override + public Document scan(ScanJobDescription jobDescription, ScheduledScan scheduledScan) { + // Capture the job description during scan + capturedJobDescription = jobDescription; + ScanTarget scanTarget = jobDescription.getScanTarget(); + + Document result = new Document(); + result.put("target", scanTarget.getIp()); + result.put("hasJobDescription", jobDescription != null); + if (jobDescription != null) { + result.put("jobId", jobDescription.getId().toString()); + } + return result; + } + + @Override + protected void initInternal() { + initCalled = true; + } + + @Override + protected void cleanupInternal() { + cleanupCalled = true; + } + + public boolean isInitCalled() { + return initCalled; + } + + public boolean isCleanupCalled() { + return cleanupCalled; + } + + public ScanJobDescription getCapturedJobDescription() { + return capturedJobDescription; + } + } + + @Test + void testGetCurrentJobDescriptionReturnsNullOutsideScanContext() { + TestScanConfig config = new TestScanConfig(); + TestBulkScanWorker worker = new TestBulkScanWorker("test-bulk-id", config, 1); + + // getCurrentJobDescription() is protected, so we can't call it directly from test + // But we can verify through the scan() method that it returns null when not in context + assertNull( + worker.getCapturedJobDescription(), + "Job description should be null before any scan"); + } + + @Test + void testGetCurrentJobDescriptionReturnsCorrectJobInScanContext() throws Exception { + TestScanConfig config = new TestScanConfig(); + TestBulkScanWorker worker = new TestBulkScanWorker("test-bulk-id", config, 1); + + ScanTarget target = new ScanTarget(); + target.setIp("192.0.2.1"); // TEST-NET-1 (RFC 5737) + target.setPort(443); + + BulkScan bulkScan = + new BulkScan( + BulkScanWorkerTest.class, + BulkScanWorkerTest.class, + "test-db", + config, + System.currentTimeMillis(), + false, + null); + + ScanJobDescription jobDescription = + new ScanJobDescription(target, bulkScan, JobStatus.TO_BE_EXECUTED); + + // Execute the scan + ScheduledScan scheduledScan = worker.handle(jobDescription); + Document result = scheduledScan.getFinalResult().get(); + + // Verify the job description was available during scan + assertTrue( + result.getBoolean("hasJobDescription"), + "Job description should be available in scan context"); + assertEquals(jobDescription.getId().toString(), result.getString("jobId")); + + // Verify the captured job description matches + assertNotNull(worker.getCapturedJobDescription()); + assertEquals(jobDescription.getId(), worker.getCapturedJobDescription().getId()); + assertEquals(target, worker.getCapturedJobDescription().getScanTarget()); + + // Simulate the partial results persistence flow + DummyPersistenceProvider persistenceProvider = new DummyPersistenceProvider(); + + // Update job status to SUCCESS (required by ScanResult constructor) + jobDescription.setStatus(JobStatus.SUCCESS); + + // Create ScanResult from the scan result Document and job description + ScanResult scanResult = new ScanResult(jobDescription, result); + + // Verify ScanResult has the correct scanJobDescriptionId + assertEquals( + jobDescription.getId().toString(), + scanResult.getScanJobDescriptionId(), + "ScanResult should use job description UUID as scanJobDescriptionId"); + + // Simulate persisting to MongoDB + persistenceProvider.insertScanResult(scanResult, jobDescription); + + // Simulate retrieving from MongoDB by scanJobDescriptionId + ScanResult retrievedResult = + persistenceProvider.getScanResultByScanJobDescriptionId( + "test-db", "test-collection", jobDescription.getId().toString()); + + // Verify the retrieved result matches + assertNotNull( + retrievedResult, "Should be able to retrieve ScanResult by job description ID"); + assertEquals( + jobDescription.getId().toString(), + retrievedResult.getScanJobDescriptionId(), + "Retrieved result should have matching scanJobDescriptionId"); + assertEquals( + scanResult.getBulkScan(), + retrievedResult.getBulkScan(), + "Retrieved result should have matching bulk scan ID"); + assertEquals( + scanResult.getScanTarget(), + retrievedResult.getScanTarget(), + "Retrieved result should have matching scan target"); + assertEquals( + scanResult.getResult(), + retrievedResult.getResult(), + "Retrieved result should have matching result document"); + } + + @Test + void testThreadLocalIsCleanedUpAfterScan() throws Exception { + TestScanConfig config = new TestScanConfig(); + TestBulkScanWorker worker = new TestBulkScanWorker("test-bulk-id", config, 1); + + ScanTarget target = new ScanTarget(); + target.setIp("192.0.2.1"); // TEST-NET-1 (RFC 5737) + target.setPort(443); + + BulkScan bulkScan = + new BulkScan( + BulkScanWorkerTest.class, + BulkScanWorkerTest.class, + "test-db", + config, + System.currentTimeMillis(), + false, + null); + + ScanJobDescription jobDescription = + new ScanJobDescription(target, bulkScan, JobStatus.TO_BE_EXECUTED); + + // Execute the scan + ScheduledScan scheduledScan = worker.handle(jobDescription); + scheduledScan.getFinalResult().get(); // Wait for completion + + // After scan completes, verify we can run another scan + ScanTarget newTarget = new ScanTarget(); + newTarget.setIp("192.0.2.2"); // TEST-NET-1 (RFC 5737) + newTarget.setPort(443); + + ScanJobDescription newJobDescription = + new ScanJobDescription(newTarget, bulkScan, JobStatus.TO_BE_EXECUTED); + + ScheduledScan scheduledScan2 = worker.handle(newJobDescription); + Document result2 = scheduledScan2.getFinalResult().get(); + + // The second scan should have the second job description, not the first + assertEquals(newJobDescription.getId().toString(), result2.getString("jobId")); + assertEquals(newJobDescription.getId(), worker.getCapturedJobDescription().getId()); + } + + @Test + void testMultipleConcurrentScansHaveSeparateContexts() throws Exception { + TestScanConfig config = new TestScanConfig(); + TestBulkScanWorker worker = new TestBulkScanWorker("test-bulk-id", config, 2); + + BulkScan bulkScan = + new BulkScan( + BulkScanWorkerTest.class, + BulkScanWorkerTest.class, + "test-db", + config, + System.currentTimeMillis(), + false, + null); + + // Create multiple job descriptions + List jobDescriptions = new ArrayList<>(); + List scheduledScans = new ArrayList<>(); + + for (int i = 0; i < 5; i++) { + ScanTarget target = new ScanTarget(); + target.setIp("192.0.2." + (i + 1)); // TEST-NET-1 (RFC 5737) + target.setPort(443); + + ScanJobDescription jobDescription = + new ScanJobDescription(target, bulkScan, JobStatus.TO_BE_EXECUTED); + jobDescriptions.add(jobDescription); + + scheduledScans.add(worker.handle(jobDescription)); + } + + // Wait for all scans to complete and verify each got the correct job description + for (int i = 0; i < 5; i++) { + Document result = scheduledScans.get(i).getFinalResult().get(); + assertTrue(result.getBoolean("hasJobDescription")); + assertEquals( + jobDescriptions.get(i).getId().toString(), + result.getString("jobId"), + "Scan " + i + " should have its own job description"); + } + } + + @Test + void testInitializationIsCalledOnFirstHandle() throws Exception { + TestScanConfig config = new TestScanConfig(); + TestBulkScanWorker worker = new TestBulkScanWorker("test-bulk-id", config, 1); + + assertFalse(worker.isInitCalled(), "Init should not be called before first handle"); + + ScanTarget target = new ScanTarget(); + target.setIp("192.0.2.1"); // TEST-NET-1 (RFC 5737) + target.setPort(443); + + BulkScan bulkScan = + new BulkScan( + BulkScanWorkerTest.class, + BulkScanWorkerTest.class, + "test-db", + config, + System.currentTimeMillis(), + false, + null); + + ScanJobDescription jobDescription = + new ScanJobDescription(target, bulkScan, JobStatus.TO_BE_EXECUTED); + + ScheduledScan scheduledScan = worker.handle(jobDescription); + scheduledScan.getFinalResult().get(); + + assertTrue(worker.isInitCalled(), "Init should be called on first handle"); + } + + @Test + void testCleanupIsCalledWhenAllJobsComplete() throws Exception { + TestScanConfig config = new TestScanConfig(); + TestBulkScanWorker worker = new TestBulkScanWorker("test-bulk-id", config, 1); + + ScanTarget target = new ScanTarget(); + target.setIp("192.0.2.1"); // TEST-NET-1 (RFC 5737) + target.setPort(443); + + BulkScan bulkScan = + new BulkScan( + BulkScanWorkerTest.class, + BulkScanWorkerTest.class, + "test-db", + config, + System.currentTimeMillis(), + false, + null); + + ScanJobDescription jobDescription = + new ScanJobDescription(target, bulkScan, JobStatus.TO_BE_EXECUTED); + + ScheduledScan scheduledScan = worker.handle(jobDescription); + scheduledScan.getFinalResult().get(); + + // Give cleanup a moment to execute (it runs after job completion) + Thread.sleep(100); + + assertTrue(worker.isCleanupCalled(), "Cleanup should be called when all jobs complete"); + } + + @Test + void testManualInitPreventsSelfCleanup() throws Exception { + TestScanConfig config = new TestScanConfig(); + TestBulkScanWorker worker = new TestBulkScanWorker("test-bulk-id", config, 1); + + // Call init manually + worker.init(); + assertTrue(worker.isInitCalled(), "Init should be called"); + + ScanTarget target = new ScanTarget(); + target.setIp("192.0.2.1"); // TEST-NET-1 (RFC 5737) + target.setPort(443); + + BulkScan bulkScan = + new BulkScan( + BulkScanWorkerTest.class, + BulkScanWorkerTest.class, + "test-db", + config, + System.currentTimeMillis(), + false, + null); + + ScanJobDescription jobDescription = + new ScanJobDescription(target, bulkScan, JobStatus.TO_BE_EXECUTED); + + ScheduledScan scheduledScan = worker.handle(jobDescription); + scheduledScan.getFinalResult().get(); + + // Give cleanup a moment (if it were to execute) + Thread.sleep(100); + + assertFalse( + worker.isCleanupCalled(), + "Cleanup should NOT be called when init was manual (shouldCleanupSelf = false)"); + + // Cleanup should only be called when we explicitly call it + worker.cleanup(); + assertTrue(worker.isCleanupCalled(), "Cleanup should be called when explicitly called"); + } +} diff --git a/src/test/java/de/rub/nds/crawler/dummy/DummyPersistenceProvider.java b/src/test/java/de/rub/nds/crawler/dummy/DummyPersistenceProvider.java index 501b3d4..1bc3e9a 100644 --- a/src/test/java/de/rub/nds/crawler/dummy/DummyPersistenceProvider.java +++ b/src/test/java/de/rub/nds/crawler/dummy/DummyPersistenceProvider.java @@ -15,10 +15,12 @@ import java.util.ArrayList; import java.util.LinkedList; import java.util.List; +import org.bson.Document; public class DummyPersistenceProvider implements IPersistenceProvider { public final List results = new ArrayList<>(); public final List bulkScans = new ArrayList<>(); + public final List partialResults = new ArrayList<>(); @Override public void insertScanResult(ScanResult scanResult, ScanJobDescription job) { @@ -55,4 +57,9 @@ public ScanResult getScanResultByScanJobDescriptionId( .max((r1, r2) -> r1.getTimestamp().compareTo(r2.getTimestamp())) .orElse(null); } + + @Override + public void upsertPartialResult(ScanJobDescription job, Document partialResult) { + partialResults.add(partialResult); + } }