Skip to content
Open
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions src/main/java/de/rub/nds/crawler/constant/JobStatus.java
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@
public enum JobStatus {
/** Job is waiting to be executed. */
TO_BE_EXECUTED(false),
/** Job is currently being executed. Partial results may be available in DB. */
RUNNING(false),
/** The domain was not resolvable. An empty result was written to DB. */
UNRESOLVABLE(true),
/** An uncaught exception occurred while resolving the host. */
Expand Down
30 changes: 25 additions & 5 deletions src/main/java/de/rub/nds/crawler/core/BulkScanWorker.java
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
package de.rub.nds.crawler.core;

import de.rub.nds.crawler.data.ScanConfig;
import de.rub.nds.crawler.data.ScanJobDescription;
import de.rub.nds.crawler.data.ScanTarget;
import de.rub.nds.crawler.util.CanceallableThreadPoolExecutor;
import de.rub.nds.scanner.core.execution.NamedThreadFactory;
Expand Down Expand Up @@ -41,6 +42,10 @@ public abstract class BulkScanWorker<T extends ScanConfig> {
/** The scan configuration for this worker */
protected final T scanConfig;

// ThreadLocal to pass ScanJobDescription to scan() implementations
private static final ThreadLocal<ScanJobDescription> currentJobDescription =
new ThreadLocal<>();

/**
* Calls the inner scan function and may handle cleanup. This is needed to wrap the scanner into
* a future object such that we can handle timeouts properly.
Expand Down Expand Up @@ -75,22 +80,37 @@ protected BulkScanWorker(String bulkScanId, T scanConfig, int parallelScanThread
* initialize itself. In this case it will also clean up itself if all jobs are done.
*
* @param scanTarget The target to scan.
* @param jobDescription The job description for this scan.
* @return A future that resolves to the scan result once the scan is done.
*/
public Future<Document> handle(ScanTarget scanTarget) {
public Future<Document> handle(ScanTarget scanTarget, ScanJobDescription jobDescription) {
// if we initialized ourself, we also clean up ourself
shouldCleanupSelf.weakCompareAndSetAcquire(false, init());
activeJobs.incrementAndGet();
return timeoutExecutor.submit(
() -> {
Document result = scan(scanTarget);
if (activeJobs.decrementAndGet() == 0 && shouldCleanupSelf.get()) {
cleanup();
try {
currentJobDescription.set(jobDescription);
Document result = scan(scanTarget);
if (activeJobs.decrementAndGet() == 0 && shouldCleanupSelf.get()) {
cleanup();
}
return result;
} finally {
currentJobDescription.remove();
}
return result;
});
}

/**
* Get the ScanJobDescription for the current scan. Only valid when called from within scan().
*
* @return The current ScanJobDescription, or null if not in a scan context
*/
protected ScanJobDescription getCurrentJobDescription() {
return currentJobDescription.get();
}

/**
* Scans a target and returns the result as a Document. This is the core scanning functionality
* that must be implemented by subclasses.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -148,6 +148,6 @@ public Future<Document> handle(
bulkScanInfo.getScanConfig(),
parallelConnectionThreads,
parallelScanThreads);
return worker.handle(scanJobDescription.getScanTarget());
return worker.handle(scanJobDescription.getScanTarget(), scanJobDescription);
}
}