diff --git a/README.md b/README.md index f1a761c..7611af1 100644 --- a/README.md +++ b/README.md @@ -39,3 +39,27 @@ make ## Usage For comprehensive help, use `dooked --help` + +### History tracking + +When a previous JSON result is used as input, dooked now keeps DNS records that +were seen in earlier runs and annotates each record with: + +- `first-seen`: UTC time the record first appeared in the dataset +- `last-seen`: UTC time the record was most recently observed +- `seen`: number of runs where the record was observed + +This helps with load-balanced targets where valid records rotate between runs. +The following flags surface the history: + +```sh +dooked -i previous.json --fs +dooked -i previous.json --ls 2 +dooked -i previous.json --lsd 05/31/2026 +``` + +- `--fs` reports records first seen during the current run. +- `--ls ` reports records missing from the current run and not seen for + at least that many days. +- `--lsd ` reports records missing from the current run and last + seen on or before that date. diff --git a/dooked/include/cli_preprocessor.hpp b/dooked/include/cli_preprocessor.hpp index 43fa1ba..2f1d5a6 100644 --- a/dooked/include/cli_preprocessor.hpp +++ b/dooked/include/cli_preprocessor.hpp @@ -19,12 +19,15 @@ struct cli_args_t { std::string resolver_filename{}; std::string output_filename{}; std::string input_filename{}; + std::string last_seen_date{}; int file_type{}; int post_http_request{}; int thread_count{}; int content_length{-1}; + int last_seen_days{-1}; bool include_date{false}; + bool first_seen_alert{false}; }; struct runtime_args_t { @@ -36,6 +39,9 @@ struct runtime_args_t { http_process_e http_request_time_{}; int thread_count{}; int content_length{-1}; + int last_seen_days{-1}; + std::string last_seen_date{}; + bool first_seen_alert{false}; }; void run_program(cli_args_t const &cli_args); diff --git a/dooked/include/utils/io_utils.hpp b/dooked/include/utils/io_utils.hpp index 829b09e..1a6e0d8 100644 --- a/dooked/include/utils/io_utils.hpp +++ b/dooked/include/utils/io_utils.hpp @@ -26,9 +26,12 @@ void trim(std::string &); struct json_data_t { std::string domain_name{}; std::string rdata{}; + std::string first_seen{}; + std::string last_seen{}; int ttl{}; int http_code{}; int content_length{}; + int seen{}; dns_record_type_e type{}; static json_data_t serialize(std::string const &d, int const len, @@ -40,6 +43,19 @@ struct json_data_t { dns_str_to_record_type(json_object["type"].get()); data.rdata = json_object["info"].get(); data.ttl = json_object["ttl"].get(); + if (json_object.count("first-seen") != 0) { + data.first_seen = json_object["first-seen"].get(); + } else if (json_object.count("first_seen") != 0) { + data.first_seen = json_object["first_seen"].get(); + } + if (json_object.count("last-seen") != 0) { + data.last_seen = json_object["last-seen"].get(); + } else if (json_object.count("last_seen") != 0) { + data.last_seen = json_object["last_seen"].get(); + } + if (json_object.count("seen") != 0) { + data.seen = json_object["seen"].get(); + } data.content_length = len; data.http_code = http_code; return data; diff --git a/dooked/include/utils/probe_result.hpp b/dooked/include/utils/probe_result.hpp index 07211c6..41ef435 100644 --- a/dooked/include/utils/probe_result.hpp +++ b/dooked/include/utils/probe_result.hpp @@ -10,8 +10,11 @@ bool case_insensitive_compare(std::string const &, std::string const &); struct probe_result_t { std::string rdata{}; + std::string first_seen{}; + std::string last_seen{}; dns_record_type_e type{}; // RR TYPE (2 octets) std::uint32_t ttl{}; // time to live(4 octets) + int seen{0}; friend bool operator==(probe_result_t const &a, probe_result_t const &b) { return case_insensitive_compare(a.rdata, b.rdata) && (a.type == b.type); diff --git a/dooked/include/utils/random_utils.hpp b/dooked/include/utils/random_utils.hpp index 0e87be2..88c2f69 100644 --- a/dooked/include/utils/random_utils.hpp +++ b/dooked/include/utils/random_utils.hpp @@ -1,6 +1,7 @@ #pragma once #include +#include #include namespace dooked { diff --git a/dooked/source/cli_preprocessor.cpp b/dooked/source/cli_preprocessor.cpp index c08d7fb..04ca005 100644 --- a/dooked/source/cli_preprocessor.cpp +++ b/dooked/source/cli_preprocessor.cpp @@ -4,10 +4,14 @@ #include "utils/exceptions.hpp" #include "utils/random_utils.hpp" #include "utils/string_utils.hpp" +#include #include #include +#include +#include #include #include +#include // defined (and assigned to) in main.cpp extern bool silent; @@ -18,6 +22,188 @@ namespace dooked { namespace net = boost::asio; using namespace fmt::v7::literals; +std::string utc_time_to_string(std::time_t const timestamp) { + std::tm tm{}; +#ifdef _WIN32 + gmtime_s(&tm, ×tamp); +#else + gmtime_r(×tamp, &tm); +#endif + std::ostringstream out{}; + out << std::put_time(&tm, "%Y-%m-%dT%H:%M:%SZ"); + return out.str(); +} + +std::optional parse_utc_time(std::string const &value) { + if (value.empty()) { + return std::nullopt; + } + std::tm tm{}; + std::istringstream in{value}; + in >> std::get_time(&tm, "%Y-%m-%dT%H:%M:%SZ"); + if (in.fail()) { + return std::nullopt; + } +#ifdef _WIN32 + return _mkgmtime(&tm); +#else + return timegm(&tm); +#endif +} + +std::optional parse_us_date(std::string const &value) { + if (value.empty()) { + return std::nullopt; + } + + for (auto const *format : {"%m/%d/%Y", "%m/%d/%Y %H:%M:%S"}) { + std::tm tm{}; + std::istringstream in{value}; + in >> std::get_time(&tm, format); + if (!in.fail()) { +#ifdef _WIN32 + return _mkgmtime(&tm); +#else + return timegm(&tm); +#endif + } + } + return std::nullopt; +} + +bool same_record(std::string const &domain_name, probe_result_t const &record, + json_data_t const &previous) { + return domain_name == previous.domain_name && record.type == previous.type && + case_insensitive_compare(record.rdata, previous.rdata); +} + +bool current_result_contains(map_container_t const &result_map, + json_data_t const &previous) { + auto const ¤t_map = result_map.cresult(); + auto const current_domain_iter = current_map.find(previous.domain_name); + if (current_domain_iter == current_map.end()) { + return false; + } + auto const &records = current_domain_iter->second.dns_result_list_; + return std::any_of(records.cbegin(), records.cend(), + [&previous](auto const &record) { + return record.type == previous.type && + case_insensitive_compare(record.rdata, + previous.rdata); + }); +} + +probe_result_t preserved_record(json_data_t const &previous, + std::string const &now) { + probe_result_t record{}; + record.rdata = previous.rdata; + record.first_seen = previous.first_seen.empty() ? now : previous.first_seen; + record.last_seen = previous.last_seen.empty() ? now : previous.last_seen; + record.type = previous.type; + record.ttl = (std::uint32_t)previous.ttl; + record.seen = previous.seen > 0 ? previous.seen : 1; + return record; +} + +void report_last_seen_alerts(std::vector const &previous_result, + map_container_t const &result_map, + runtime_args_t const &rt_args, + std::time_t const now_time) { + if (rt_args.last_seen_days < 0 && rt_args.last_seen_date.empty()) { + return; + } + + std::optional threshold{}; + if (rt_args.last_seen_days >= 0) { + threshold = now_time - ((std::time_t)rt_args.last_seen_days * 24 * 60 * 60); + } else { + threshold = parse_us_date(rt_args.last_seen_date); + if (!threshold) { + spdlog::warn("unable to parse --lsd date `{}`; expected MM/DD/YYYY", + rt_args.last_seen_date); + return; + } + } + + for (auto const &previous : previous_result) { + if (current_result_contains(result_map, previous)) { + continue; + } + auto const last_seen = + parse_utc_time(previous.last_seen).value_or(now_time); + if (last_seen <= *threshold) { + spdlog::warn("[LAST-SEEN][{}][{}] `{}` last seen {}", previous.domain_name, + dns_record_type_to_str(previous.type), previous.rdata, + previous.last_seen.empty() ? "unknown" : previous.last_seen); + } + } +} + +void apply_history_metadata(std::vector const &previous_result, + map_container_t &result_map, + runtime_args_t const &rt_args, + std::time_t const now_time) { + auto const now = utc_time_to_string(now_time); + + for (auto &result_pair : result_map.result()) { + auto const &domain_name = result_pair.first; + for (auto &record : result_pair.second.dns_result_list_) { + auto const previous_iter = + std::find_if(previous_result.cbegin(), previous_result.cend(), + [&domain_name, &record](auto const &previous) { + return same_record(domain_name, record, previous); + }); + if (previous_iter != previous_result.cend()) { + record.first_seen = previous_iter->first_seen.empty() + ? now + : previous_iter->first_seen; + record.last_seen = now; + record.seen = (previous_iter->seen > 0 ? previous_iter->seen : 1) + 1; + } else { + record.first_seen = now; + record.last_seen = now; + record.seen = 1; + if (rt_args.first_seen_alert) { + spdlog::info("[FIRST-SEEN][{}][{}] `{}`", domain_name, + dns_record_type_to_str(record.type), record.rdata); + } + } + } + } + + for (auto const &previous : previous_result) { + if (current_result_contains(result_map, previous)) { + continue; + } + bool const domain_exists = + result_map.cresult().find(previous.domain_name) != + result_map.cresult().end(); + result_map.append(previous.domain_name, preserved_record(previous, now)); + if (!domain_exists) { + result_map.insert(previous.domain_name, previous.content_length, + previous.http_code); + } + } +} + +void initialize_history_metadata(map_container_t &result_map, + runtime_args_t const &rt_args, + std::time_t const now_time) { + auto const now = utc_time_to_string(now_time); + for (auto &result_pair : result_map.result()) { + auto const &domain_name = result_pair.first; + for (auto &record : result_pair.second.dns_result_list_) { + record.first_seen = now; + record.last_seen = now; + record.seen = 1; + if (rt_args.first_seen_alert) { + spdlog::info("[FIRST-SEEN][{}][{}] `{}`", domain_name, + dns_record_type_to_str(record.type), record.rdata); + } + } + } +} + void compare_http_result(int const base_cl, json_data_t const &prev_http_result, http_response_t const ¤t_result) { auto const current_req_cl = current_result.content_length_; @@ -350,10 +536,7 @@ void start_name_checking(runtime_args_t &&rt_args) { } thread_pool->join(); } - if (!silent) { - spdlog::info("Writing JSON output"); - } - write_json_result(result_map, rt_args); + auto const now = std::time(nullptr); // compare old with new result -- only if we had previous record if (rt_args.previous_data) { @@ -373,9 +556,18 @@ void start_name_checking(runtime_args_t &&rt_args) { return std::tie(a.type, a.rdata) < std::tie(b.type, b.rdata); }); } - return compare_results(*rt_args.previous_data, result_map, - rt_args.content_length); + compare_results(*rt_args.previous_data, result_map, + rt_args.content_length); + report_last_seen_alerts(previous_data, result_map, rt_args, now); + apply_history_metadata(previous_data, result_map, rt_args, now); + } else { + initialize_history_metadata(result_map, rt_args, now); } + + if (!silent) { + spdlog::info("Writing JSON output"); + } + write_json_result(result_map, rt_args); } void run_program(cli_args_t const &cli_args) { @@ -477,6 +669,9 @@ void run_program(cli_args_t const &cli_args) { static_cast(cli_args.post_http_request); rt_args.thread_count = cli_args.thread_count; rt_args.content_length = cli_args.content_length; + rt_args.first_seen_alert = cli_args.first_seen_alert; + rt_args.last_seen_days = cli_args.last_seen_days; + rt_args.last_seen_date = cli_args.last_seen_date; return start_name_checking(std::move(rt_args)); } diff --git a/dooked/source/http/requests_handler.cpp b/dooked/source/http/requests_handler.cpp index d21a592..322c3c9 100644 --- a/dooked/source/http/requests_handler.cpp +++ b/dooked/source/http/requests_handler.cpp @@ -10,6 +10,10 @@ extern bool silent; namespace dooked { +std::string header_value_to_string(beast::string_view const value) { + return {value.data(), value.size()}; +} + http_request_handler_t::http_request_handler_t(net::io_context &io_context, std::string domain_name) : io_{io_context}, domain_{std::move(domain_name)} {} @@ -139,7 +143,8 @@ void http_request_handler_t::on_data_received( if (status_code_simple == 2) { response_int = response_type_e::ok; } else if (status_code_simple == 3) { // redirected - response_string = (*response_)[http::field::location].to_string(); + response_string = + header_value_to_string((*response_)[http::field::location]); if (response_string.empty()) { response_int = response_type_e::unknown_response; } else { @@ -171,7 +176,8 @@ void http_request_handler_t::on_data_received( int content_length{}; if (response_->has_content_length()) { try { - auto const cl_str = (*response_)[http::field::content_length].to_string(); + auto const cl_str = + header_value_to_string((*response_)[http::field::content_length]); content_length = std::stoi(cl_str); } catch (std::exception const &) { } @@ -365,7 +371,8 @@ void https_request_handler_t::on_data_received( if (status_code_simple == 2) { response_int = response_type_e::ok; } else if (status_code_simple == 3) { // redirected - response_string = (*response_)[http::field::location].to_string(); + response_string = + header_value_to_string((*response_)[http::field::location]); if (response_string.empty()) { response_int = response_type_e::unknown_response; } else { @@ -392,7 +399,8 @@ void https_request_handler_t::on_data_received( int content_length = 0; if (response_->has_content_length()) { try { - auto const cl_str = (*response_)[http::field::content_length].to_string(); + auto const cl_str = + header_value_to_string((*response_)[http::field::content_length]); content_length = std::stoi(cl_str); } catch (std::exception const &) { } diff --git a/dooked/source/main.cpp b/dooked/source/main.cpp index cf29460..cf92098 100644 --- a/dooked/source/main.cpp +++ b/dooked/source/main.cpp @@ -41,6 +41,14 @@ int main(int argc, char **argv) { "defers http request until after all DNS requests have been completed"); app.add_flag("--compare-cl", compare_cl, "compare content-length of HTTP requests"); + app.add_flag("--fs", cli_args.first_seen_alert, + "show records first seen during this run"); + app.add_option("--ls", cli_args.last_seen_days, + "show records missing from this run and not seen for this " + "many days"); + app.add_option("--lsd", cli_args.last_seen_date, + "show records missing from this run and not seen since a US " + "date such as MM/DD/YYYY"); app.add_flag("--nbc", no_bytes_count, "in case `content-length` is missing in an HTTP header field," diff --git a/dooked/source/utils/io_utils.cpp b/dooked/source/utils/io_utils.cpp index a1bd5d3..01541d5 100644 --- a/dooked/source/utils/io_utils.cpp +++ b/dooked/source/utils/io_utils.cpp @@ -6,6 +6,15 @@ void to_json(json &j, probe_result_t const &record) { j = json{{"ttl", record.ttl}, {"type", dns_record_type_to_str(record.type)}, {"info", record.rdata}}; + if (!record.first_seen.empty()) { + j["first-seen"] = record.first_seen; + } + if (!record.last_seen.empty()) { + j["last-seen"] = record.last_seen; + } + if (record.seen > 0) { + j["seen"] = record.seen; + } } bool is_text_file(std::string const &file_extension) {