From 1853e0c14fab7819f833f33f3990e8c4ccec2e81 Mon Sep 17 00:00:00 2001 From: Mashhur Date: Thu, 17 Apr 2025 16:51:48 -0700 Subject: [PATCH 01/21] ESQL and DSL executors are introduced. param can accept ES|QL query shape now. is introduced for initial step but needs team's feedback. DSL logics moved into DSL executors. --- lib/logstash/filters/elasticsearch.rb | 124 ++++-------------- lib/logstash/filters/elasticsearch/client.rb | 4 +- .../filters/elasticsearch/dsl_executor.rb | 121 +++++++++++++++++ .../filters/elasticsearch/esql_executor.rb | 77 +++++++++++ 4 files changed, 227 insertions(+), 99 deletions(-) create mode 100644 lib/logstash/filters/elasticsearch/dsl_executor.rb create mode 100644 lib/logstash/filters/elasticsearch/esql_executor.rb diff --git a/lib/logstash/filters/elasticsearch.rb b/lib/logstash/filters/elasticsearch.rb index f4335ab..ac422c6 100644 --- a/lib/logstash/filters/elasticsearch.rb +++ b/lib/logstash/filters/elasticsearch.rb @@ -12,6 +12,9 @@ class LogStash::Filters::Elasticsearch < LogStash::Filters::Base + require 'logstash/filters/elasticsearch/dsl_executor' + require 'logstash/filters/elasticsearch/esql_executor' + include LogStash::PluginMixins::ECSCompatibilitySupport include LogStash::PluginMixins::ECSCompatibilitySupport::TargetCheck @@ -24,8 +27,13 @@ class LogStash::Filters::Elasticsearch < LogStash::Filters::Base # Field substitution (e.g. `index-name-%{date_field}`) is available config :index, :validate => :string, :default => "" - # Elasticsearch query string. Read the Elasticsearch query string documentation. - # for more info at: https://www.elastic.co/guide/en/elasticsearch/reference/master/query-dsl-query-string-query.html#query-string-syntax + # Query mode to define what query style/syntax is used with @query param. + config :query_mode, :validate => %w[esql dsl], :default => "dsl" + + # Elasticsearch query string. This can be in DSL or ES|QL query shape. + # Read the Elasticsearch query string documentation. + # DSL: https://www.elastic.co/guide/en/elasticsearch/reference/master/query-dsl-query-string-query.html#query-string-syntax + # ES|QL: https://www.elastic.co/guide/en/elasticsearch/reference/current/esql.html config :query, :validate => :string # File path to elasticsearch query in DSL format. Read the Elasticsearch query documentation @@ -134,6 +142,11 @@ class LogStash::Filters::Elasticsearch < LogStash::Filters::Base # What status codes to retry on? config :retry_on_status, :validate => :number, :list => true, :default => [500, 502, 503, 504] + # params to send to ES|QL query, naming params preferred + # example, + # if query is "FROM my-index | WHERE some_type = ?type" + # esql_params => [{"type": "@type_field"}] + config :esql_params, :validate => :array, :default => [] config :ssl, :obsolete => "Set 'ssl_enabled' instead." config :ca_file, :obsolete => "Set 'ssl_certificate_authorities' instead." @@ -186,73 +199,23 @@ def register if get_client.es_transport_client_type == "elasticsearch_transport" require_relative "elasticsearch/patches/_elasticsearch_transport_http_manticore" end - end # def register - - def filter(event) - matched = false - begin - params = { :index => event.sprintf(@index) } - - if @query_dsl - query = LogStash::Json.load(event.sprintf(@query_dsl)) - params[:body] = query - else - query = event.sprintf(@query) - params[:q] = query - params[:size] = result_size - params[:sort] = @sort if @enable_sort - end - @logger.debug("Querying elasticsearch for lookup", :params => params) - - results = get_client.search(params) - raise "Elasticsearch query error: #{results["_shards"]["failures"]}" if results["_shards"].include? "failures" - - event.set("[@metadata][total_hits]", extract_total_from_hits(results['hits'])) - - resultsHits = results["hits"]["hits"] - if !resultsHits.nil? && !resultsHits.empty? - matched = true - @fields.each do |old_key, new_key| - old_key_path = extract_path(old_key) - extracted_hit_values = resultsHits.map do |doc| - extract_value(doc["_source"], old_key_path) - end - value_to_set = extracted_hit_values.count > 1 ? extracted_hit_values : extracted_hit_values.first - set_to_event_target(event, new_key, value_to_set) - end - @docinfo_fields.each do |old_key, new_key| - old_key_path = extract_path(old_key) - extracted_docs_info = resultsHits.map do |doc| - extract_value(doc, old_key_path) - end - value_to_set = extracted_docs_info.count > 1 ? extracted_docs_info : extracted_docs_info.first - set_to_event_target(event, new_key, value_to_set) - end - end - - resultsAggs = results["aggregations"] - if !resultsAggs.nil? && !resultsAggs.empty? - matched = true - @aggregation_fields.each do |agg_name, ls_field| - set_to_event_target(event, ls_field, resultsAggs[agg_name]) - end - end - - rescue => e - if @logger.trace? - @logger.warn("Failed to query elasticsearch for previous event", :index => @index, :query => query, :event => event.to_hash, :error => e.message, :backtrace => e.backtrace) - elsif @logger.debug? - @logger.warn("Failed to query elasticsearch for previous event", :index => @index, :error => e.message, :backtrace => e.backtrace) - else - @logger.warn("Failed to query elasticsearch for previous event", :index => @index, :error => e.message) - end - @tag_on_failure.each{|tag| event.tag(tag)} + if @query_mode == "esql" + @esql_executor ||= LogStash::Filters::Elasticsearch::EsqlExecutor.new(self, @logger) else - filter_matched(event) if matched + @esql_executor ||= LogStash::Filters::Elasticsearch::DslExecutor.new(self, @logger) end + end # def register + + def filter(event) + @esql_executor.process(get_client, event) end # def filter + def decorate(event) + # Elasticsearch class has an access for `filter_matched` + filter_matched(event) + end + # public only to be reuse in testing def prepare_user_agent os_name = java.lang.System.getProperty('os.name') @@ -376,39 +339,6 @@ def get_client end end - # get an array of path elements from a path reference - def extract_path(path_reference) - return [path_reference] unless path_reference.start_with?('[') && path_reference.end_with?(']') - - path_reference[1...-1].split('][') - end - - # given a Hash and an array of path fragments, returns the value at the path - # @param source [Hash{String=>Object}] - # @param path [Array{String}] - # @return [Object] - def extract_value(source, path) - path.reduce(source) do |memo, old_key_fragment| - break unless memo.include?(old_key_fragment) - memo[old_key_fragment] - end - end - - # Given a "hits" object from an Elasticsearch response, return the total number of hits in - # the result set. - # @param hits [Hash{String=>Object}] - # @return [Integer] - def extract_total_from_hits(hits) - total = hits['total'] - - # Elasticsearch 7.x produces an object containing `value` and `relation` in order - # to enable unambiguous reporting when the total is only a lower bound; if we get - # an object back, return its `value`. - return total['value'] if total.kind_of?(Hash) - - total - end - def hosts_default?(hosts) hosts.is_a?(Array) && hosts.size == 1 && !original_params.key?('hosts') end diff --git a/lib/logstash/filters/elasticsearch/client.rb b/lib/logstash/filters/elasticsearch/client.rb index 120d8e5..b42d0ec 100644 --- a/lib/logstash/filters/elasticsearch/client.rb +++ b/lib/logstash/filters/elasticsearch/client.rb @@ -55,8 +55,8 @@ def initialize(logger, hosts, options = {}) @client = ::Elasticsearch::Client.new(client_options) end - def search(params={}) - @client.search(params) + def search(params={}, query_type = 'dsl') + query_type == 'esql' ? @client.esql.query(params) : @client.search(params) end def info diff --git a/lib/logstash/filters/elasticsearch/dsl_executor.rb b/lib/logstash/filters/elasticsearch/dsl_executor.rb new file mode 100644 index 0000000..eb92a12 --- /dev/null +++ b/lib/logstash/filters/elasticsearch/dsl_executor.rb @@ -0,0 +1,121 @@ +# encoding: utf-8 + +module LogStash + module Filters + class Elasticsearch + class DslExecutor + def initialize(plugin, logger) + @plugin = plugin + @index = plugin.params["index"] + @query = plugin.params["query"] + @fields = plugin.params["fields"] + @result_size = plugin.params["result_size"] + @docinfo_fields = plugin.params["docinfo_fields"] + @tag_on_failure = plugin.params["tag_on_failure"] + @query_dsl = plugin.params["query_dsl"] + @enable_sort = plugin.params["enable_sort"] + @sort = plugin.params["sort"] + @logger = logger + end + + def process(client, event) + matched = false + begin + params = { :index => event.sprintf(@index) } + + if @query_dsl + query = LogStash::Json.load(event.sprintf(@query_dsl)) + params[:body] = query + else + query = event.sprintf(@query) + params[:q] = query + params[:size] = @result_size + params[:sort] = @sort if @enable_sort + end + + @logger.debug("Querying elasticsearch for lookup", :params => params) + + results = client.search(params) + raise "Elasticsearch query error: #{results["_shards"]["failures"]}" if results["_shards"].include? "failures" + + event.set("[@metadata][total_hits]", extract_total_from_hits(results['hits'])) + + result_hits = results["hits"]["hits"] + if !result_hits.nil? && !result_hits.empty? + matched = true + @fields.each do |old_key, new_key| + old_key_path = extract_path(old_key) + set = result_hits.map do |doc| + extract_value(doc["_source"], old_key_path) + end + event.set(new_key, set.count > 1 ? set : set.first) + end + @docinfo_fields.each do |old_key, new_key| + old_key_path = extract_path(old_key) + set = result_hits.map do |doc| + extract_value(doc, old_key_path) + end + event.set(new_key, set.count > 1 ? set : set.first) + end + end + + result_aggregations = results["aggregations"] + if !result_aggregations.nil? && !result_aggregations.empty? + matched = true + @aggregation_fields.each do |agg_name, ls_field| + event.set(ls_field, result_aggregations[agg_name]) + end + end + + rescue => e + if @logger.trace? + @logger.warn("Failed to query elasticsearch for previous event", :index => @index, :query => @query, :event => event.to_hash, :error => e.message, :backtrace => e.backtrace) + elsif @logger.debug? + @logger.warn("Failed to query elasticsearch for previous event", :index => @index, :error => e.message, :backtrace => e.backtrace) + else + @logger.warn("Failed to query elasticsearch for previous event", :index => @index, :error => e.message) + end + @tag_on_failure.each { |tag| event.tag(tag) } + else + @plugin.decorate(event) if matched + end + end + + private + + # Given a "hits" object from an Elasticsearch response, return the total number of hits in + # the result set. + # @param hits [Hash{String=>Object}] + # @return [Integer] + def extract_total_from_hits(hits) + total = hits['total'] + + # Elasticsearch 7.x produces an object containing `value` and `relation` in order + # to enable unambiguous reporting when the total is only a lower bound; if we get + # an object back, return its `value`. + return total['value'] if total.kind_of?(Hash) + total + end + + # get an array of path elements from a path reference + def extract_path(path_reference) + return [path_reference] unless path_reference.start_with?('[') && path_reference.end_with?(']') + + path_reference[1...-1].split('][') + end + + # given a Hash and an array of path fragments, returns the value at the path + # @param source [Hash{String=>Object}] + # @param path [Array{String}] + # @return [Object] + def extract_value(source, path) + path.reduce(source) do |memo, old_key_fragment| + break unless memo.include?(old_key_fragment) + memo[old_key_fragment] + end + end + + end + end + end +end \ No newline at end of file diff --git a/lib/logstash/filters/elasticsearch/esql_executor.rb b/lib/logstash/filters/elasticsearch/esql_executor.rb new file mode 100644 index 0000000..9bf364c --- /dev/null +++ b/lib/logstash/filters/elasticsearch/esql_executor.rb @@ -0,0 +1,77 @@ +# encoding: utf-8 + +module LogStash + module Filters + class Elasticsearch + class EsqlExecutor + + def initialize(plugin, logger) + @plugin = plugin + @esql_params = plugin.params["esql_params"] + @query = plugin.params["query"] + @fields = plugin.params["fields"] + @tag_on_failure = plugin.params["tag_on_failure"] + @logger = logger + end + + def process(client, event) + resolved_params = @esql_params&.any? ? resolve_parameters(event) : [] + response = execute_query(client, resolved_params) + inform_warning(response) + process_response(event, response) + @plugin.decorate(event) + rescue => e + @logger.error("Failed to process ES|QL filter", exception: e) + @tag_on_failure.each { |tag| event.tag(tag) } + end + + private + + def resolve_parameters(event) + @esql_params.map do |entry| + entry.each_with_object({}) do |(key, value), new_entry| + begin + new_entry[key] = event.sprintf(value) + rescue => e + @logger.error("Failed to resolve parameter", key: key, value: value, error: e.message) + raise + end + end + end + end + + def execute_query(client, params) + @logger.debug("Executing ES|QL query", query: @query, params: params) + client.search({ body: { query: @query, params: params }, format: 'json' }, 'esql') + end + + def process_response(event, response) + return unless response['values'] && response['columns'] + + event.set("[@metadata][total_hits]", response['values'].size) + add_requested_fields(event, response) + end + + def inform_warning(response) + return unless (warning = response&.headers&.dig('warning')) + @logger.warn("ES|QL query execution warning: ", { message: warning }) + end + + def handle_errors(response) + return unless response&.headers&.dig("warning") + @logger.warn("ES|QL query execution warning: ", message: response.headers['warning']) + end + + def add_requested_fields(event, response) + @fields.each do |old_key, new_key| + column_index = response['columns'].find_index { |col| col['name'] == old_key } + next unless column_index + + values = response['values'].map { |entry| entry[column_index] } + event.set(new_key, values.one? ? values.first : values) if values&.size > 0 + end + end + end + end + end +end From b7407710685e97110a195f6c30322063c8272963 Mon Sep 17 00:00:00 2001 From: Mashhur Date: Mon, 21 Apr 2025 10:48:08 -0700 Subject: [PATCH 02/21] Clean up and add unit tests --- lib/logstash/filters/elasticsearch.rb | 87 +++++++++++---- .../filters/elasticsearch/dsl_executor.rb | 3 +- .../filters/elasticsearch/esql_executor.rb | 7 +- spec/filters/elasticsearch_esql_spec.rb | 103 ++++++++++++++++++ 4 files changed, 169 insertions(+), 31 deletions(-) create mode 100644 spec/filters/elasticsearch_esql_spec.rb diff --git a/lib/logstash/filters/elasticsearch.rb b/lib/logstash/filters/elasticsearch.rb index ac422c6..c797393 100644 --- a/lib/logstash/filters/elasticsearch.rb +++ b/lib/logstash/filters/elasticsearch.rb @@ -159,6 +159,9 @@ class LogStash::Filters::Elasticsearch < LogStash::Filters::Base include MonitorMixin attr_reader :shared_client + LS_ESQL_SUPPORT_VERSION = "8.17.4" # the version started using elasticsearch-ruby v8 + ES_ESQL_SUPPORT_VERSION = "8.11.0" + ## # @override to handle proxy => '' as if none was set # @param value [Array] @@ -176,17 +179,20 @@ def self.validate_value(value, validator) return super(value, :uri) end + attr_reader :query_dsl + def register - #Load query if it exists - if @query_template - if File.zero?(@query_template) - raise "template is empty" - end - file = File.open(@query_template, 'r') - @query_dsl = file.read + case @query_mode + when "esql" + validate_ls_version_for_esql_support! + validate_esql_query! + inform_ineffective_esql_params + @esql_executor ||= LogStash::Filters::Elasticsearch::EsqlExecutor.new(self, @logger) + else # dsl + validate_dsl_query_settings! + @esql_executor ||= LogStash::Filters::Elasticsearch::DslExecutor.new(self, @logger) end - validate_query_settings fill_hosts_from_cloud_id setup_ssl_params! validate_authentication @@ -195,16 +201,11 @@ def register @hosts = Array(@hosts).map { |host| host.to_s } # potential SafeURI#to_s test_connection! + validate_es_for_esql_support! setup_serverless if get_client.es_transport_client_type == "elasticsearch_transport" require_relative "elasticsearch/patches/_elasticsearch_transport_http_manticore" end - - if @query_mode == "esql" - @esql_executor ||= LogStash::Filters::Elasticsearch::EsqlExecutor.new(self, @logger) - else - @esql_executor ||= LogStash::Filters::Elasticsearch::DslExecutor.new(self, @logger) - end end # def register def filter(event) @@ -343,16 +344,6 @@ def hosts_default?(hosts) hosts.is_a?(Array) && hosts.size == 1 && !original_params.key?('hosts') end - def validate_query_settings - unless @query || @query_template - raise LogStash::ConfigurationError, "Both `query` and `query_template` are empty. Require either `query` or `query_template`." - end - - if @query && @query_template - raise LogStash::ConfigurationError, "Both `query` and `query_template` are set. Use either `query` or `query_template`." - end - end - def validate_authentication authn_options = 0 authn_options += 1 if @cloud_auth @@ -444,4 +435,52 @@ def setup_ssl_params! params['ssl_enabled'] = @ssl_enabled ||= Array(@hosts).all? { |host| host && host.to_s.start_with?("https") } end + def validate_dsl_query_settings! + #Load query if it exists + if @query_template + if File.zero?(@query_template) + raise "template is empty" + end + file = File.open(@query_template, 'r') + @query_dsl = file.read + end + + validate_query_settings + end + + def validate_query_settings + unless @query || @query_template + raise LogStash::ConfigurationError, "Both `query` and `query_template` are empty. Require either `query` or `query_template`." + end + + if @query && @query_template + raise LogStash::ConfigurationError, "Both `query` and `query_template` are set. Use either `query` or `query_template`." + end + end + + def validate_ls_version_for_esql_support! + if Gem::Version.create(LOGSTASH_VERSION) < Gem::Version.create(LS_ESQL_SUPPORT_VERSION) + fail("Current version of Logstash does not include Elasticsearch client which supports ES|QL. Please upgrade Logstash to at least #{LS_ESQL_SUPPORT_VERSION}") + end + end + + def validate_esql_query! + fail(LogStash::ConfigurationError, "`query` cannot be empty") if @query.strip.empty? + source_commands = %w[FROM ROW SHOW] + contains_source_command = source_commands.any? { |source_command| @query.strip.start_with?(source_command) } + fail(LogStash::ConfigurationError, "`query` needs to start with any of #{source_commands}") unless contains_source_command + end + + def inform_ineffective_esql_params + ineffective_options = original_params.keys & %w(index target size slices search_api, docinfo, docinfo_target, docinfo_fields) + @logger.info("Configured #{ineffective_options} params are ineffective in ES|QL mode") if ineffective_options.size > 1 + end + + def validate_es_for_esql_support! + return unless @query_mode == 'esql' + # make sure connected ES supports ES|QL (8.11+) + es_supports_esql = Gem::Version.create(es_version) >= Gem::Version.create(ES_ESQL_SUPPORT_VERSION) + fail("Connected Elasticsearch #{es_version} version does not supports ES|QL. ES|QL feature requires at least Elasticsearch #{ES_ESQL_SUPPORT_VERSION} version.") unless es_supports_esql + end + end #class LogStash::Filters::Elasticsearch diff --git a/lib/logstash/filters/elasticsearch/dsl_executor.rb b/lib/logstash/filters/elasticsearch/dsl_executor.rb index eb92a12..a5c7b6a 100644 --- a/lib/logstash/filters/elasticsearch/dsl_executor.rb +++ b/lib/logstash/filters/elasticsearch/dsl_executor.rb @@ -8,13 +8,14 @@ def initialize(plugin, logger) @plugin = plugin @index = plugin.params["index"] @query = plugin.params["query"] + @query_dsl = plugin.query_dsl @fields = plugin.params["fields"] @result_size = plugin.params["result_size"] @docinfo_fields = plugin.params["docinfo_fields"] @tag_on_failure = plugin.params["tag_on_failure"] - @query_dsl = plugin.params["query_dsl"] @enable_sort = plugin.params["enable_sort"] @sort = plugin.params["sort"] + @aggregation_fields = plugin.params["aggregation_fields"] @logger = logger end diff --git a/lib/logstash/filters/elasticsearch/esql_executor.rb b/lib/logstash/filters/elasticsearch/esql_executor.rb index 9bf364c..9d7ec8a 100644 --- a/lib/logstash/filters/elasticsearch/esql_executor.rb +++ b/lib/logstash/filters/elasticsearch/esql_executor.rb @@ -54,12 +54,7 @@ def process_response(event, response) def inform_warning(response) return unless (warning = response&.headers&.dig('warning')) - @logger.warn("ES|QL query execution warning: ", { message: warning }) - end - - def handle_errors(response) - return unless response&.headers&.dig("warning") - @logger.warn("ES|QL query execution warning: ", message: response.headers['warning']) + @logger.warn("ES|QL executor received warning", { message: warning }) end def add_requested_fields(event, response) diff --git a/spec/filters/elasticsearch_esql_spec.rb b/spec/filters/elasticsearch_esql_spec.rb new file mode 100644 index 0000000..5d2c097 --- /dev/null +++ b/spec/filters/elasticsearch_esql_spec.rb @@ -0,0 +1,103 @@ +# encoding: utf-8 +require "logstash/devutils/rspec/spec_helper" +require "logstash/filters/elasticsearch" + +describe LogStash::Filters::Elasticsearch::EsqlExecutor do + let(:client) { instance_double(LogStash::Filters::ElasticsearchClient) } + let(:logger) { double("logger") } + let(:plugin) { LogStash::Filters::Elasticsearch.new(plugin_config) } + let(:plugin_config) do + { + "query_mode" => "esql", + "query" => "FROM test-index | STATS count() BY field" + } + end + let(:esql_executor) { described_class.new(plugin, logger) } + + describe "when initializes" do + it "sets up the ESQL client with correct parameters" do + expect(esql_executor.instance_variable_get(:@query)).to eq(plugin_config["query"]) + expect(esql_executor.instance_variable_get(:@esql_params)).to eq([]) + expect(esql_executor.instance_variable_get(:@fields)).to eq({}) + expect(esql_executor.instance_variable_get(:@tag_on_failure)).to eq(["_elasticsearch_lookup_failure"]) + end + end + + describe "when processes" do + let(:plugin_config) { + super() + .merge( + { + "query" => "FROM my-index | WHERE field = ?foo", + "esql_params" => [{ "foo" => "%{bar}" }], + "fields" => { "val" => "val_new" } + }) + } + let(:event) { LogStash::Event.new({}) } + let(:response) { { 'values' => [%w[foo bar]], 'columns' => [{ 'name' => 'id' }, { 'name' => 'val' }] } } + + before do + allow(event).to receive(:sprintf).and_return("resolved_value") + end + + it "resolves parameters" do + expect(event).to receive(:sprintf).with("%{bar}").and_return("resolved_value") + resolved_params = esql_executor.send(:resolve_parameters, event) + expect(resolved_params).to include("foo" => "resolved_value") + end + + it "executes the query with resolved parameters" do + allow(logger).to receive(:debug) + expect(client).to receive(:search).with( + { body: { query: plugin_config["query"], params: [{ "foo" => "resolved_value" }] }, format: 'json' }, + 'esql') + resolved_params = esql_executor.send(:resolve_parameters, event) + esql_executor.send(:execute_query, client, resolved_params) + end + + it "informs warning if received warning" do + allow(response).to receive(:headers).and_return({ "warning" => "some warning" }) + expect(logger).to receive(:warn).with("ES|QL executor received warning", { :message => "some warning" }) + esql_executor.send(:inform_warning, response) + end + + it "processes the response and adds metadata" do + expect(event).to receive(:set).with("[@metadata][total_hits]", 1) + expect(event).to receive(:set).with("val_new", "bar") + esql_executor.send(:process_response, event, response) + end + + it "executes chain of processes" do + allow(plugin).to receive(:decorate) + allow(logger).to receive(:debug) + allow(response).to receive(:headers).and_return({}) + expect(client).to receive(:search).with( + { + body: { query: plugin_config["query"], params: plugin_config["esql_params"] }, + format: 'json' + }, + 'esql' + ).and_return(response) + expect { esql_executor.process(client, LogStash::Event.new({ "hello" => "world" })) }.to_not raise_error + end + + it "tags on plugin failures" do + expect(event).to receive(:sprintf).with("%{bar}").and_raise("Event#sprintf error") + + expect(logger).to receive(:error).with("Failed to resolve parameter", {:error=>"Event#sprintf error", :key=>"foo", :value=>"%{bar}"}) + expect(logger).to receive(:error).with("Failed to process ES|QL filter", exception: instance_of(RuntimeError)) + expect(event).to receive(:tag).with("_elasticsearch_lookup_failure") + esql_executor.process(client, event) + end + + it "tags on query execution failures" do + allow(logger).to receive(:debug) + allow(client).to receive(:search).and_raise("Query execution error") + + expect(logger).to receive(:error).with("Failed to process ES|QL filter", exception: instance_of(RuntimeError)) + expect(event).to receive(:tag).with("_elasticsearch_lookup_failure") + esql_executor.process(client, event) + end + end + +end if LOGSTASH_VERSION >= LogStash::Filters::Elasticsearch::LS_ESQL_SUPPORT_VERSION \ No newline at end of file From 265a2ed3b0c592738eeb3840ce87a7f9817b0a49 Mon Sep 17 00:00:00 2001 From: Mashhur Date: Tue, 29 Apr 2025 00:18:07 -0700 Subject: [PATCH 03/21] Resolve query type with query shape and params validations. Drop null columns support, documentation is added, basic integration tests are added. --- docs/index.asciidoc | 65 ++++++++++- lib/logstash/filters/elasticsearch.rb | 49 ++++---- lib/logstash/filters/elasticsearch/client.rb | 4 + .../filters/elasticsearch/esql_executor.rb | 14 ++- spec/filters/elasticsearch_esql_spec.rb | 19 +-- .../integration/elasticsearch_esql_spec.rb | 109 ++++++++++++++++++ 6 files changed, 224 insertions(+), 36 deletions(-) create mode 100644 spec/filters/integration/elasticsearch_esql_spec.rb diff --git a/docs/index.asciidoc b/docs/index.asciidoc index e81b46a..55715b4 100644 --- a/docs/index.asciidoc +++ b/docs/index.asciidoc @@ -54,7 +54,7 @@ if [type] == "end" { The example below reproduces the above example but utilises the query_template. This query_template represents a full Elasticsearch query DSL and supports the -standard Logstash field substitution syntax. The example below issues +standard {ls} field substitution syntax. The example below issues the same query as the first example but uses the template shown. [source,ruby] @@ -118,6 +118,55 @@ Authentication to a secure Elasticsearch cluster is possible using _one_ of the Authorization to a secure Elasticsearch cluster requires `read` permission at index level and `monitoring` permissions at cluster level. The `monitoring` permission at cluster level is necessary to perform periodic connectivity checks. +[id="plugins-{type}s-{plugin}-esql"] +==== ES|QL support +{es} Query Language (ES|QL) provides a SQL-like interface for querying your {es} data. + +To utilize the ES|QL feature with this plugin, the following version requirements must be met: +[cols="1,2",options="header"] +|=== +|Component |Minimum version +|{es} |8.11.0 or newer +|{ls} |8.17.4 or newer +|This plugin |4.23.0+ (4.x series) or 5.2.0+ (5.x series) +|=== + +To configure ES|QL query in the plugin, set your ES|QL query in the `query` parameter. + +IMPORTANT: We recommend understanding https://www.elastic.co/guide/en/elasticsearch/reference/current/esql-limitations.html[ES|QL current limitations] before using it in production environments. + +The following is a basic ES|QL query that sets food name to transaction event based on event's food ID: +[source, ruby] + filter { + elasticsearch { + hosts => [ 'https://..'] + api_key => '....' + query => ' + FROM food-index + | WHERE id = "?food_id" + ' + fields => { "name" => "food_name" } + query_params => { + named_params => ["food_id" => "%{[food_id]}"] + drop_null_columns => true + } + } + } + +Set `config.support_escapes: true` in `logstash.yml` if you need to escape special chars in the query. + +NOTE: With ES|QL query, {ls} doesn't generate `event.original`. + +In the result event, the plugin sets total result size in `[@metadata][total_values]` field. It also limits the result size to 1 when `FROM` query is used. + +Consider the following caveat scenarios: + +- ES|QL by default returns entire columns even if their values are `null`. The plugin provides a `drop_null_columns` option via <>. Enabling this parameter instructs {es} to automatically exclude columns with null values from query results. +- If your {es} index uses https://www.elastic.co/docs/reference/elasticsearch/mapping-reference/multi-fields[multi-fields] mapping(s), ES|QL query fetches all parent and sub-fields fields. Since {ls} events cannot contain parent field's concrete value and sub-field values together, we recommend using the `DROP` keyword in your ES|QL query explicitly remove sub-fields. +- If your {es} index contains top level `tags` field, this will conflict with {ls} event's reserved `tags` field. {ls} moves `tags` field values to the `_tags` and populates `tags` with `["_tagsparsefailure"]`. + +For comprehensive ES|QL syntax reference and best practices, see the https://www.elastic.co/guide/en/elasticsearch/reference/current/esql-syntax.html[official {es} documentation]. + [id="plugins-{type}s-{plugin}-options"] ==== Elasticsearch Filter Configuration Options @@ -143,6 +192,7 @@ NOTE: As of version `4.0.0` of this plugin, a number of previously deprecated se | <> |<>|No | <> |<>|No | <> |<>|No +| <> |<>|No | <> |<>|No | <> |<>|No | <> |<>|No @@ -344,6 +394,19 @@ Elasticsearch query string. More information is available in the string documentation]. Use either `query` or `query_template`. +[id="plugins-{type}s-{plugin}-query_params"] +===== `query_params` +Parameters to send to {es} together with <>. + +Accepted options: +[cols="2,1,3",options="header"] +|=== +|Option name |Default value | Description + +|`drop_null_columns` |`false` | Requests {es} to filter out `null` columns +|`named_params` |[] | List of named parameters and their matches used in the `query` +|=== + [id="plugins-{type}s-{plugin}-query_template"] ===== `query_template` diff --git a/lib/logstash/filters/elasticsearch.rb b/lib/logstash/filters/elasticsearch.rb index c797393..562fa05 100644 --- a/lib/logstash/filters/elasticsearch.rb +++ b/lib/logstash/filters/elasticsearch.rb @@ -27,9 +27,6 @@ class LogStash::Filters::Elasticsearch < LogStash::Filters::Base # Field substitution (e.g. `index-name-%{date_field}`) is available config :index, :validate => :string, :default => "" - # Query mode to define what query style/syntax is used with @query param. - config :query_mode, :validate => %w[esql dsl], :default => "dsl" - # Elasticsearch query string. This can be in DSL or ES|QL query shape. # Read the Elasticsearch query string documentation. # DSL: https://www.elastic.co/guide/en/elasticsearch/reference/master/query-dsl-query-string-query.html#query-string-syntax @@ -145,8 +142,12 @@ class LogStash::Filters::Elasticsearch < LogStash::Filters::Base # params to send to ES|QL query, naming params preferred # example, # if query is "FROM my-index | WHERE some_type = ?type" - # esql_params => [{"type": "@type_field"}] - config :esql_params, :validate => :array, :default => [] + # named params can be applied as following via query_params: + # query_params => { + # "named_params" => [ {"type" => "%{[type]}"}] + # "drop_null_columns" => true + # } + config :query_params, :validate => :hash, :default => {} config :ssl, :obsolete => "Set 'ssl_enabled' instead." config :ca_file, :obsolete => "Set 'ssl_certificate_authorities' instead." @@ -182,11 +183,11 @@ def self.validate_value(value, validator) attr_reader :query_dsl def register - case @query_mode + query_type = resolve_query_type + case query_type when "esql" validate_ls_version_for_esql_support! - validate_esql_query! - inform_ineffective_esql_params + validate_params_with_esql_query! @esql_executor ||= LogStash::Filters::Elasticsearch::EsqlExecutor.new(self, @logger) else # dsl validate_dsl_query_settings! @@ -201,7 +202,7 @@ def register @hosts = Array(@hosts).map { |host| host.to_s } # potential SafeURI#to_s test_connection! - validate_es_for_esql_support! + validate_es_for_esql_support! if query_type == "esql" setup_serverless if get_client.es_transport_client_type == "elasticsearch_transport" require_relative "elasticsearch/patches/_elasticsearch_transport_http_manticore" @@ -435,6 +436,10 @@ def setup_ssl_params! params['ssl_enabled'] = @ssl_enabled ||= Array(@hosts).all? { |host| host && host.to_s.start_with?("https") } end +def resolve_query_type + @query&.strip&.match?(/\A(?:FROM|ROW|SHOW)/) ? "esql": "dsl" +end + def validate_dsl_query_settings! #Load query if it exists if @query_template @@ -464,23 +469,25 @@ def validate_ls_version_for_esql_support! end end - def validate_esql_query! - fail(LogStash::ConfigurationError, "`query` cannot be empty") if @query.strip.empty? - source_commands = %w[FROM ROW SHOW] - contains_source_command = source_commands.any? { |source_command| @query.strip.start_with?(source_command) } - fail(LogStash::ConfigurationError, "`query` needs to start with any of #{source_commands}") unless contains_source_command - end + def validate_params_with_esql_query! + invalid_params_with_esql = original_params.keys & %w(index query_template sort docinfo_fields aggregation_fields enable_sort result_size) + fail("Configured #{invalid_params_with_esql} params cannot be used with ES|QL query") if invalid_params_with_esql.any? + + accepted_query_params = %w(named_params drop_null_columns) + original_query_params = original_params["query_params"] ||= {} + invalid_query_params = original_query_params.keys - accepted_query_params + fail("#{accepted_query_params} options are accepted in `query_params`, but found #{invalid_query_params} invalid option(s)") if invalid_query_params.any? + + is_named_params_array = original_query_params["named_params"] ? original_query_params["named_params"].class.eql?(Array) : true + fail("`query_params => named_params` is required to be array") unless is_named_params_array - def inform_ineffective_esql_params - ineffective_options = original_params.keys & %w(index target size slices search_api, docinfo, docinfo_target, docinfo_fields) - @logger.info("Configured #{ineffective_options} params are ineffective in ES|QL mode") if ineffective_options.size > 1 + # TODO: validate that placeholders in query should match the named_params end def validate_es_for_esql_support! - return unless @query_mode == 'esql' # make sure connected ES supports ES|QL (8.11+) - es_supports_esql = Gem::Version.create(es_version) >= Gem::Version.create(ES_ESQL_SUPPORT_VERSION) - fail("Connected Elasticsearch #{es_version} version does not supports ES|QL. ES|QL feature requires at least Elasticsearch #{ES_ESQL_SUPPORT_VERSION} version.") unless es_supports_esql + es_supports_esql = Gem::Version.create(get_client.es_version) >= Gem::Version.create(ES_ESQL_SUPPORT_VERSION) + fail("Connected Elasticsearch #{get_client.es_version} version does not supports ES|QL. ES|QL feature requires at least Elasticsearch #{ES_ESQL_SUPPORT_VERSION} version.") unless es_supports_esql end end #class LogStash::Filters::Elasticsearch diff --git a/lib/logstash/filters/elasticsearch/client.rb b/lib/logstash/filters/elasticsearch/client.rb index b42d0ec..5265c8f 100644 --- a/lib/logstash/filters/elasticsearch/client.rb +++ b/lib/logstash/filters/elasticsearch/client.rb @@ -63,6 +63,10 @@ def info @client.info end + def es_version + @es_version ||= info&.dig('version', 'number') + end + def build_flavor @build_flavor ||= info&.dig('version', 'build_flavor') end diff --git a/lib/logstash/filters/elasticsearch/esql_executor.rb b/lib/logstash/filters/elasticsearch/esql_executor.rb index 9d7ec8a..7fa7658 100644 --- a/lib/logstash/filters/elasticsearch/esql_executor.rb +++ b/lib/logstash/filters/elasticsearch/esql_executor.rb @@ -7,7 +7,10 @@ class EsqlExecutor def initialize(plugin, logger) @plugin = plugin - @esql_params = plugin.params["esql_params"] + + params = plugin.params["query_params"] || {} + @drop_null_columns = params["drop_null_columns"] || false + @named_params = params["named_params"] || [] @query = plugin.params["query"] @fields = plugin.params["fields"] @tag_on_failure = plugin.params["tag_on_failure"] @@ -15,7 +18,7 @@ def initialize(plugin, logger) end def process(client, event) - resolved_params = @esql_params&.any? ? resolve_parameters(event) : [] + resolved_params = @named_params&.any? ? resolve_parameters(event) : [] response = execute_query(client, resolved_params) inform_warning(response) process_response(event, response) @@ -28,7 +31,7 @@ def process(client, event) private def resolve_parameters(event) - @esql_params.map do |entry| + @named_params.map do |entry| entry.each_with_object({}) do |(key, value), new_entry| begin new_entry[key] = event.sprintf(value) @@ -42,13 +45,14 @@ def resolve_parameters(event) def execute_query(client, params) @logger.debug("Executing ES|QL query", query: @query, params: params) - client.search({ body: { query: @query, params: params }, format: 'json' }, 'esql') + client.search({ body: { query: @query, params: params }, format: 'json', drop_null_columns: @drop_null_columns }, 'esql') end def process_response(event, response) return unless response['values'] && response['columns'] - event.set("[@metadata][total_hits]", response['values'].size) + # TODO: set to the target field once target support is added + event.set("[@metadata][total_values]", response['values'].size) add_requested_fields(event, response) end diff --git a/spec/filters/elasticsearch_esql_spec.rb b/spec/filters/elasticsearch_esql_spec.rb index 5d2c097..bd980b6 100644 --- a/spec/filters/elasticsearch_esql_spec.rb +++ b/spec/filters/elasticsearch_esql_spec.rb @@ -8,8 +8,7 @@ let(:plugin) { LogStash::Filters::Elasticsearch.new(plugin_config) } let(:plugin_config) do { - "query_mode" => "esql", - "query" => "FROM test-index | STATS count() BY field" + "query" => "FROM test-index | STATS count() BY field | LIMIT 10" } end let(:esql_executor) { described_class.new(plugin, logger) } @@ -17,7 +16,8 @@ describe "when initializes" do it "sets up the ESQL client with correct parameters" do expect(esql_executor.instance_variable_get(:@query)).to eq(plugin_config["query"]) - expect(esql_executor.instance_variable_get(:@esql_params)).to eq([]) + expect(esql_executor.instance_variable_get(:@named_params)).to eq([]) + expect(esql_executor.instance_variable_get(:@drop_null_columns)).to eq(false) expect(esql_executor.instance_variable_get(:@fields)).to eq({}) expect(esql_executor.instance_variable_get(:@tag_on_failure)).to eq(["_elasticsearch_lookup_failure"]) end @@ -28,8 +28,8 @@ super() .merge( { - "query" => "FROM my-index | WHERE field = ?foo", - "esql_params" => [{ "foo" => "%{bar}" }], + "query" => "FROM my-index | WHERE field = ?foo | LIMIT 5", + "query_params" => { "named_params" => [{ "foo" => "%{bar}" }] }, "fields" => { "val" => "val_new" } }) } @@ -49,7 +49,7 @@ it "executes the query with resolved parameters" do allow(logger).to receive(:debug) expect(client).to receive(:search).with( - { body: { query: plugin_config["query"], params: [{ "foo" => "resolved_value" }] }, format: 'json' }, + { body: { query: plugin_config["query"], params: [{ "foo" => "resolved_value" }] }, format: 'json', drop_null_columns: false, }, 'esql') resolved_params = esql_executor.send(:resolve_parameters, event) esql_executor.send(:execute_query, client, resolved_params) @@ -62,7 +62,7 @@ end it "processes the response and adds metadata" do - expect(event).to receive(:set).with("[@metadata][total_hits]", 1) + expect(event).to receive(:set).with("[@metadata][total_values]", 1) expect(event).to receive(:set).with("val_new", "bar") esql_executor.send(:process_response, event, response) end @@ -73,8 +73,9 @@ allow(response).to receive(:headers).and_return({}) expect(client).to receive(:search).with( { - body: { query: plugin_config["query"], params: plugin_config["esql_params"] }, - format: 'json' + body: { query: plugin_config["query"], params: plugin_config["query_params"]["named_params"] }, + format: 'json', + drop_null_columns: false, }, 'esql' ).and_return(response) diff --git a/spec/filters/integration/elasticsearch_esql_spec.rb b/spec/filters/integration/elasticsearch_esql_spec.rb new file mode 100644 index 0000000..c88179f --- /dev/null +++ b/spec/filters/integration/elasticsearch_esql_spec.rb @@ -0,0 +1,109 @@ +# encoding: utf-8 +require "logstash/devutils/rspec/spec_helper" +require "logstash/filters/elasticsearch" +require "elasticsearch" +require_relative "../../../spec/es_helper" + +describe LogStash::Filters::Elasticsearch, integration: true do + + SECURE_INTEGRATION = ENV['SECURE_INTEGRATION'].eql? 'true' + ES_HOSTS = ["http#{SECURE_INTEGRATION ? 's' : nil}://#{ESHelper.get_host_port}"] + + let(:plugin) { described_class.new(config) } + let(:es_index) { "es-filter-plugin-esql-integration-#{rand(1000)}" } + let(:test_documents) do + [ + { "message" => "test message 1", "type" => "a", "count" => 1 }, + { "message" => "test message 2", "type" => "a", "count" => 2 }, + { "message" => "test message 3", "type" => "b", "count" => 3 }, + { "message" => "test message 4", "type" => "b", "count" => 4 }, + { "message" => "test message 5", "type" => "c", "count" => 5 } + ] + end + let(:config) do + { + "hosts" => ES_HOSTS + } + end + let(:event) { LogStash::Event.new({}) } + let(:es_client) do + Elasticsearch::Client.new(hosts: ES_HOSTS) + end + + before(:all) do + is_ls_with_esql_supported_client = Gem::Version.create(LOGSTASH_VERSION) >= Gem::Version.create(LogStash::Filters::Elasticsearch::LS_ESQL_SUPPORT_VERSION) + skip "LS version does not have ES client which supports ES|QL" unless is_ls_with_esql_supported_client + + # Skip tests if ES version doesn't support ES||QL + # { 'user' => 'tests', 'password' => 'Tests123' } + es_client = SECURE_INTEGRATION ? + Elasticsearch::Client.new(hosts: ES_HOSTS, user: 'tests', password: 'Tests123') : + Elasticsearch::Client.new(hosts: ES_HOSTS) + es_version_info = es_client.info["version"] + es_gem_version = Gem::Version.create(es_version_info["number"]) + skip "ES version does not support ES|QL" if es_gem_version.nil? || es_gem_version < Gem::Version.create(LogStash::Filters::Elasticsearch::ES_ESQL_SUPPORT_VERSION) + end + + before(:each) do + # Create index with test documents + es_client.indices.create(index: es_index, body: {}) unless es_client.indices.exists?(index: es_index) + + test_documents.each do |doc| + es_client.index(index: es_index, body: doc, refresh: true) + end + end + + after(:each) do + es_client.indices.delete(index: es_index) if es_client.indices.exists?(index: es_index) + end + + context "#run ES|QL queries" do + + before do + stub_const("LOGSTASH_VERSION", LogStash::Filters::Elasticsearch::LS_ESQL_SUPPORT_VERSION) + end + + before(:each) do + plugin.register + end + + shared_examples "ESQL query execution" do |expected_count, fields| + it "processes the event" do + plugin.filter(event) + expect(event.get("[@metadata][total_values]")).to eq(expected_count) + fields&.each do | field | + expect(event.get(field)).to exist + end + end + end + + context "#basics" do + describe "with simple FROM query" do + let(:config) do + super().merge("query" => "FROM #{es_index}") + end + + include_examples "ESQL query execution", 5 + end + + describe "with simple FROM and WHERE query combinations" do + let(:config) do + super().merge("query" => "FROM #{es_index} | WHERE type==\"b\"") + end + + include_examples "ESQL query execution", 2 + end + + describe "when invalid query supplied" do + let(:config) do + super().merge("query" => "FROM undefined index | LIMIT 1") + end + + it "tags on failure" do + plugin.filter(event) + expect(event.to_hash["tags"]).to include("_elasticsearch_lookup_failure") + end + end + end + end +end \ No newline at end of file From 2a66f1c6165b32371aced62b9acbd475964d75e1 Mon Sep 17 00:00:00 2001 From: Mashhur Date: Mon, 5 May 2025 19:45:27 -0700 Subject: [PATCH 04/21] Require drop null columns by default. Set limit to 1 by defaul if FROM query used. Add more debug, ward logs. --- docs/index.asciidoc | 19 ++-- lib/logstash/filters/elasticsearch.rb | 28 +++-- .../filters/elasticsearch/esql_executor.rb | 51 ++++++--- spec/filters/elasticsearch_esql_spec.rb | 37 ++++--- spec/filters/elasticsearch_spec.rb | 102 ++++++++++++++++++ .../integration/elasticsearch_esql_spec.rb | 78 +++++++++----- 6 files changed, 238 insertions(+), 77 deletions(-) diff --git a/docs/index.asciidoc b/docs/index.asciidoc index 55715b4..21ccc0e 100644 --- a/docs/index.asciidoc +++ b/docs/index.asciidoc @@ -135,7 +135,7 @@ To configure ES|QL query in the plugin, set your ES|QL query in the `query` para IMPORTANT: We recommend understanding https://www.elastic.co/guide/en/elasticsearch/reference/current/esql-limitations.html[ES|QL current limitations] before using it in production environments. -The following is a basic ES|QL query that sets food name to transaction event based on event's food ID: +The following is a basic ES|QL query that sets food name to transaction event based on upstream event's food ID: [source, ruby] filter { elasticsearch { @@ -145,27 +145,20 @@ The following is a basic ES|QL query that sets food name to transaction event ba FROM food-index | WHERE id = "?food_id" ' - fields => { "name" => "food_name" } query_params => { - named_params => ["food_id" => "%{[food_id]}"] - drop_null_columns => true + named_params => ["food_id" => "[food][id]"] } + fields => { "food.name" => "food_name" } } } Set `config.support_escapes: true` in `logstash.yml` if you need to escape special chars in the query. -NOTE: With ES|QL query, {ls} doesn't generate `event.original`. +In the result event, the plugin sets total result size in `[@metadata][total_hits]` field. It also limits the result size to 1 when `FROM` query is used. -In the result event, the plugin sets total result size in `[@metadata][total_values]` field. It also limits the result size to 1 when `FROM` query is used. +NOTE: If `FROM` execution command used and not `LIMIT` is set, the plugin attaches `| LIMIT 1`. -Consider the following caveat scenarios: - -- ES|QL by default returns entire columns even if their values are `null`. The plugin provides a `drop_null_columns` option via <>. Enabling this parameter instructs {es} to automatically exclude columns with null values from query results. -- If your {es} index uses https://www.elastic.co/docs/reference/elasticsearch/mapping-reference/multi-fields[multi-fields] mapping(s), ES|QL query fetches all parent and sub-fields fields. Since {ls} events cannot contain parent field's concrete value and sub-field values together, we recommend using the `DROP` keyword in your ES|QL query explicitly remove sub-fields. -- If your {es} index contains top level `tags` field, this will conflict with {ls} event's reserved `tags` field. {ls} moves `tags` field values to the `_tags` and populates `tags` with `["_tagsparsefailure"]`. - -For comprehensive ES|QL syntax reference and best practices, see the https://www.elastic.co/guide/en/elasticsearch/reference/current/esql-syntax.html[official {es} documentation]. +For comprehensive ES|QL syntax reference and best practices, see the https://www.elastic.co/guide/en/elasticsearch/reference/current/esql-syntax.html[{es} ES|QL documentation]. [id="plugins-{type}s-{plugin}-options"] ==== Elasticsearch Filter Configuration Options diff --git a/lib/logstash/filters/elasticsearch.rb b/lib/logstash/filters/elasticsearch.rb index 562fa05..dc73c90 100644 --- a/lib/logstash/filters/elasticsearch.rb +++ b/lib/logstash/filters/elasticsearch.rb @@ -145,7 +145,6 @@ class LogStash::Filters::Elasticsearch < LogStash::Filters::Base # named params can be applied as following via query_params: # query_params => { # "named_params" => [ {"type" => "%{[type]}"}] - # "drop_null_columns" => true # } config :query_params, :validate => :hash, :default => {} @@ -186,8 +185,11 @@ def register query_type = resolve_query_type case query_type when "esql" + invalid_params_with_esql = original_params.keys & %w(index query_template sort docinfo_fields aggregation_fields enable_sort result_size) + raise LogStash::ConfigurationError, "Configured #{invalid_params_with_esql} params cannot be used with ES|QL query" if invalid_params_with_esql.any? + validate_ls_version_for_esql_support! - validate_params_with_esql_query! + validate_esql_query_and_params! @esql_executor ||= LogStash::Filters::Elasticsearch::EsqlExecutor.new(self, @logger) else # dsl validate_dsl_query_settings! @@ -469,19 +471,25 @@ def validate_ls_version_for_esql_support! end end - def validate_params_with_esql_query! - invalid_params_with_esql = original_params.keys & %w(index query_template sort docinfo_fields aggregation_fields enable_sort result_size) - fail("Configured #{invalid_params_with_esql} params cannot be used with ES|QL query") if invalid_params_with_esql.any? - - accepted_query_params = %w(named_params drop_null_columns) + def validate_esql_query_and_params! + accepted_query_params = %w(named_params) original_query_params = original_params["query_params"] ||= {} invalid_query_params = original_query_params.keys - accepted_query_params - fail("#{accepted_query_params} options are accepted in `query_params`, but found #{invalid_query_params} invalid option(s)") if invalid_query_params.any? + raise LogStash::ConfigurationError, "#{accepted_query_params} option(s) accepted in `query_params`, but found #{invalid_query_params} invalid option(s)" if invalid_query_params.any? is_named_params_array = original_query_params["named_params"] ? original_query_params["named_params"].class.eql?(Array) : true - fail("`query_params => named_params` is required to be array") unless is_named_params_array + raise LogStash::ConfigurationError, "`query_params => named_params` is required to be array" unless is_named_params_array + + named_params = original_query_params["named_params"] ||= [] + named_params_keys = named_params.map(&:keys).flatten - # TODO: validate that placeholders in query should match the named_params + placeholders = @query.scan(/\?(\w+)/).flatten + raise LogStash::ConfigurationError, "Number of placeholders in `query` and `named_params` do not match" unless placeholders.size == named_params_keys.size + + placeholders.each do |placeholder| + placeholder.delete_prefix!("?") + raise LogStash::ConfigurationError, "Placeholder #{placeholder} not found in query" unless named_params_keys.include?(placeholder) + end end def validate_es_for_esql_support! diff --git a/lib/logstash/filters/elasticsearch/esql_executor.rb b/lib/logstash/filters/elasticsearch/esql_executor.rb index 7fa7658..1b40740 100644 --- a/lib/logstash/filters/elasticsearch/esql_executor.rb +++ b/lib/logstash/filters/elasticsearch/esql_executor.rb @@ -7,14 +7,19 @@ class EsqlExecutor def initialize(plugin, logger) @plugin = plugin + @logger = logger - params = plugin.params["query_params"] || {} - @drop_null_columns = params["drop_null_columns"] || false - @named_params = params["named_params"] || [] @query = plugin.params["query"] + if @query.strip.start_with?("FROM") && !@query.match?(/\|\s*LIMIT/) + @logger.warn("ES|QL query doesn't contain LIMIT, adding `| LIMIT 1` to optimize the performance") + @query.concat(' | LIMIT 1') + end + + query_params = plugin.params["query_params"] || {} + @named_params = query_params["named_params"] || [] @fields = plugin.params["fields"] @tag_on_failure = plugin.params["tag_on_failure"] - @logger = logger + @logger.debug("ES|QL query executor initialized with ", query: @query, named_params: @named_params) end def process(client, event) @@ -34,8 +39,11 @@ def resolve_parameters(event) @named_params.map do |entry| entry.each_with_object({}) do |(key, value), new_entry| begin - new_entry[key] = event.sprintf(value) + resolved_value = event.get(value) + @logger.debug("Resolved value for #{key}: #{resolved_value}, its class: #{resolved_value.class}") + new_entry[key] = resolved_value rescue => e + # catches invalid field reference @logger.error("Failed to resolve parameter", key: key, value: value, error: e.message) raise end @@ -44,16 +52,30 @@ def resolve_parameters(event) end def execute_query(client, params) + # debug logs may help to check what query shape the plugin is sending to ES @logger.debug("Executing ES|QL query", query: @query, params: params) - client.search({ body: { query: @query, params: params }, format: 'json', drop_null_columns: @drop_null_columns }, 'esql') + client.search({ body: { query: @query, params: params }, format: 'json', drop_null_columns: true }, 'esql') end def process_response(event, response) - return unless response['values'] && response['columns'] + columns = response['columns'].freeze + values = response['values'].freeze + if values.nil? || values.size == 0 + @logger.debug("Empty ES|QL query result", columns: columns, values: values) + return + end + + # this shouldn't never happen but just in case not crash the plugin + if columns.nil? || columns.size == 0 + @logger.error("No columns exist but received values", columns: columns, values: values) + return + end - # TODO: set to the target field once target support is added - event.set("[@metadata][total_values]", response['values'].size) - add_requested_fields(event, response) + # TODO: do we need to set `total_hits` to target? + # if not, how do we resolve conflict with existing es-input total_hits field? + # FYI: with DSL it stores in `[@metadata][total_hits]` + event.set("[@metadata][total_hits]", values.size) + add_requested_fields(event, columns, values) end def inform_warning(response) @@ -61,13 +83,14 @@ def inform_warning(response) @logger.warn("ES|QL executor received warning", { message: warning }) end - def add_requested_fields(event, response) + def add_requested_fields(event, columns, values) @fields.each do |old_key, new_key| - column_index = response['columns'].find_index { |col| col['name'] == old_key } + column_index = columns.find_index { |col| col['name'] == old_key } next unless column_index - values = response['values'].map { |entry| entry[column_index] } - event.set(new_key, values.one? ? values.first : values) if values&.size > 0 + row_values = values.map { |entry| entry[column_index] }&.compact # remove non-exist field values with compact + # TODO: set to the target field once target support is added + event.set(new_key, row_values.one? ? row_values.first : row_values) if row_values&.size > 0 end end end diff --git a/spec/filters/elasticsearch_esql_spec.rb b/spec/filters/elasticsearch_esql_spec.rb index bd980b6..0da3e51 100644 --- a/spec/filters/elasticsearch_esql_spec.rb +++ b/spec/filters/elasticsearch_esql_spec.rb @@ -13,43 +13,44 @@ end let(:esql_executor) { described_class.new(plugin, logger) } - describe "when initializes" do + context "when initializes" do it "sets up the ESQL client with correct parameters" do + allow(logger).to receive(:debug) expect(esql_executor.instance_variable_get(:@query)).to eq(plugin_config["query"]) expect(esql_executor.instance_variable_get(:@named_params)).to eq([]) - expect(esql_executor.instance_variable_get(:@drop_null_columns)).to eq(false) expect(esql_executor.instance_variable_get(:@fields)).to eq({}) expect(esql_executor.instance_variable_get(:@tag_on_failure)).to eq(["_elasticsearch_lookup_failure"]) end end - describe "when processes" do + context "when processes" do let(:plugin_config) { super() .merge( { "query" => "FROM my-index | WHERE field = ?foo | LIMIT 5", - "query_params" => { "named_params" => [{ "foo" => "%{bar}" }] }, - "fields" => { "val" => "val_new" } + "query_params" => { "named_params" => [{ "foo" => "[bar]" }] }, + "fields" => { "val" => "val_new", "odd" => "new_odd" } }) } let(:event) { LogStash::Event.new({}) } - let(:response) { { 'values' => [%w[foo bar]], 'columns' => [{ 'name' => 'id' }, { 'name' => 'val' }] } } + let(:response) { { 'values' => [["foo", "bar", nil]], 'columns' => [{ 'name' => 'id' }, { 'name' => 'val' }, { 'name' => 'odd' }] } } before do - allow(event).to receive(:sprintf).and_return("resolved_value") + allow(logger).to receive(:debug) end it "resolves parameters" do - expect(event).to receive(:sprintf).with("%{bar}").and_return("resolved_value") + expect(event).to receive(:get).with("[bar]").and_return("resolved_value") resolved_params = esql_executor.send(:resolve_parameters, event) expect(resolved_params).to include("foo" => "resolved_value") end it "executes the query with resolved parameters" do allow(logger).to receive(:debug) + expect(event).to receive(:get).with("[bar]").and_return("resolved_value") expect(client).to receive(:search).with( - { body: { query: plugin_config["query"], params: [{ "foo" => "resolved_value" }] }, format: 'json', drop_null_columns: false, }, + { body: { query: plugin_config["query"], params: [{ "foo" => "resolved_value" }] }, format: 'json', drop_null_columns: true, }, 'esql') resolved_params = esql_executor.send(:resolve_parameters, event) esql_executor.send(:execute_query, client, resolved_params) @@ -62,7 +63,7 @@ end it "processes the response and adds metadata" do - expect(event).to receive(:set).with("[@metadata][total_values]", 1) + expect(event).to receive(:set).with("[@metadata][total_hits]", 1) expect(event).to receive(:set).with("val_new", "bar") esql_executor.send(:process_response, event, response) end @@ -73,19 +74,25 @@ allow(response).to receive(:headers).and_return({}) expect(client).to receive(:search).with( { - body: { query: plugin_config["query"], params: plugin_config["query_params"]["named_params"] }, + body: { query: plugin_config["query"], params: [{"foo"=>"resolve_me"}] }, format: 'json', - drop_null_columns: false, + drop_null_columns: true, }, 'esql' ).and_return(response) - expect { esql_executor.process(client, LogStash::Event.new({ "hello" => "world" })) }.to_not raise_error + + event = LogStash::Event.new({ "hello" => "world", "bar" => "resolve_me" }) + expect { esql_executor.process(client, event) }.to_not raise_error + expect(event.get("[@metadata][total_hits]")).to eq(1) + expect(event.get("hello")).to eq("world") + expect(event.get("val_new")).to eq("bar") + expect(event.get("new_odd")).to be_nil # filters out non-exist fields end it "tags on plugin failures" do - expect(event).to receive(:sprintf).with("%{bar}").and_raise("Event#sprintf error") + expect(event).to receive(:get).with("[bar]").and_raise("Event#get Invalid FieldReference error") - expect(logger).to receive(:error).with("Failed to resolve parameter", {:error=>"Event#sprintf error", :key=>"foo", :value=>"%{bar}"}) + expect(logger).to receive(:error).with("Failed to resolve parameter", {:error=>"Event#get Invalid FieldReference error", :key=>"foo", :value=>"[bar]"}) expect(logger).to receive(:error).with("Failed to process ES|QL filter", exception: instance_of(RuntimeError)) expect(event).to receive(:tag).with("_elasticsearch_lookup_failure") esql_executor.process(client, event) diff --git a/spec/filters/elasticsearch_spec.rb b/spec/filters/elasticsearch_spec.rb index 3e1f6f2..ad1be19 100644 --- a/spec/filters/elasticsearch_spec.rb +++ b/spec/filters/elasticsearch_spec.rb @@ -891,6 +891,108 @@ def wait_receive_request end end + describe "ES|QL" do + + describe "compatibility" do + let(:config) {{ "hosts" => ["localhost:9200"], "query" => "FROM my-index" }} + + context "when LS doesn't support ES|QL" do + let(:ls_version) { LogStash::Filters::Elasticsearch::LS_ESQL_SUPPORT_VERSION } + before(:each) do + stub_const("LOGSTASH_VERSION", "8.17.0") + end + + it "raises a runtime error" do + expect { plugin.send(:validate_ls_version_for_esql_support!) } + .to raise_error(RuntimeError, /Current version of Logstash does not include Elasticsearch client which supports ES|QL. Please upgrade Logstash to at least #{ls_version}/) + end + end + + context "when ES doesn't support ES|QL" do + let(:es_version) { LogStash::Filters::Elasticsearch::ES_ESQL_SUPPORT_VERSION } + let(:client) { double(:client) } + + it "raises a runtime error" do + allow(plugin).to receive(:get_client).twice.and_return(client) + allow(client).to receive(:es_version).and_return("8.8.0") + + expect { plugin.send(:validate_es_for_esql_support!) } + .to raise_error(RuntimeError, /Connected Elasticsearch 8.8.0 version does not supports ES|QL. ES|QL feature requires at least Elasticsearch #{es_version} version./) + end + end + end + + context "when non-ES|QL params applied" do + let(:config) do + { + "hosts" => ["localhost:9200"], + "query" => "FROM my-index", + "index" => "some-index", + "docinfo_fields" => { "_index" => "es_index" }, + "sort" => "@timestamp:desc", + "enable_sort" => true, + "aggregation_fields" => { "bytes_avg" => "bytes_avg_ls_field" } + } + end + it "raises a config error" do + invalid_params_with_esql = %w(index docinfo_fields sort enable_sort aggregation_fields) + error_text = /Configured #{invalid_params_with_esql} params cannot be used with ES|QL query/i + expect { plugin.register }.to raise_error LogStash::ConfigurationError, error_text + end + end + + context "when `named_params` isn't array" do + let(:config) do + { + "hosts" => ["localhost:9200"], + "query" => "FROM my-index", + "query_params" => { "named_params" => {"a" => "b"} }, + } + end + it "raises a config error" do + expect { plugin.register }.to raise_error LogStash::ConfigurationError, /`query_params => named_params` is required to be array/ + end + end + + context "when `named_params` exists but not placeholder in the query" do + let(:config) do + { + "hosts" => ["localhost:9200"], + "query" => "FROM my-index", + "query_params" => { "named_params" => [{"a" => "b"}] }, + } + end + it "raises a config error" do + expect { plugin.register }.to raise_error LogStash::ConfigurationError, /Number of placeholders in `query` and `named_params` do not match/ + end + end + + context "when `named_params` doesn't exist but placeholder found" do + let(:config) do + { + "hosts" => ["localhost:9200"], + "query" => "FROM my-index | WHERE a = ?a" + } + end + it "raises a config error" do + expect { plugin.register }.to raise_error LogStash::ConfigurationError, /Number of placeholders in `query` and `named_params` do not match/ + end + end + + context "when placeholder and `named_params` do not match" do + let(:config) do + { + "hosts" => ["localhost:9200"], + "query" => "FROM my-index | WHERE type = ?type", + "query_params" => { "named_params" => [{"b" => "c"}] }, + } + end + it "raises a config error" do + expect { plugin.register }.to raise_error LogStash::ConfigurationError, /Placeholder type not found in query/ + end + end + end + def extract_transport(client) # on 7x: client.transport.transport # on >=8.x: client.transport diff --git a/spec/filters/integration/elasticsearch_esql_spec.rb b/spec/filters/integration/elasticsearch_esql_spec.rb index c88179f..0960b64 100644 --- a/spec/filters/integration/elasticsearch_esql_spec.rb +++ b/spec/filters/integration/elasticsearch_esql_spec.rb @@ -17,7 +17,8 @@ { "message" => "test message 2", "type" => "a", "count" => 2 }, { "message" => "test message 3", "type" => "b", "count" => 3 }, { "message" => "test message 4", "type" => "b", "count" => 4 }, - { "message" => "test message 5", "type" => "c", "count" => 5 } + { "message" => "test message 5", "type" => "c", "count" => 5 }, + { "message" => "odd test message", "type" => "t" } ] end let(:config) do @@ -35,7 +36,6 @@ skip "LS version does not have ES client which supports ES|QL" unless is_ls_with_esql_supported_client # Skip tests if ES version doesn't support ES||QL - # { 'user' => 'tests', 'password' => 'Tests123' } es_client = SECURE_INTEGRATION ? Elasticsearch::Client.new(hosts: ES_HOSTS, user: 'tests', password: 'Tests123') : Elasticsearch::Client.new(hosts: ES_HOSTS) @@ -57,7 +57,7 @@ es_client.indices.delete(index: es_index) if es_client.indices.exists?(index: es_index) end - context "#run ES|QL queries" do + describe "run ES|QL queries" do before do stub_const("LOGSTASH_VERSION", LogStash::Filters::Elasticsearch::LS_ESQL_SUPPORT_VERSION) @@ -70,39 +70,67 @@ shared_examples "ESQL query execution" do |expected_count, fields| it "processes the event" do plugin.filter(event) - expect(event.get("[@metadata][total_values]")).to eq(expected_count) - fields&.each do | field | - expect(event.get(field)).to exist + expect(event.get("[@metadata][total_hits]")).to eq(expected_count) + fields&.each do | old_key, new_key | + expect(event.get(new_key)).not_to be(nil) end end end - context "#basics" do - describe "with simple FROM query" do - let(:config) do - super().merge("query" => "FROM #{es_index}") - end + describe "LIMIT 1 by default" do + let(:config) do + super().merge("query" => "FROM #{es_index}") + end + + include_examples "ESQL query execution", 1 + end - include_examples "ESQL query execution", 5 + describe "with simple FROM query with LIMIT" do + let(:config) do + super().merge("query" => "FROM #{es_index} | LIMIT 99") end - describe "with simple FROM and WHERE query combinations" do - let(:config) do - super().merge("query" => "FROM #{es_index} | WHERE type==\"b\"") - end + include_examples "ESQL query execution", 6 + end - include_examples "ESQL query execution", 2 + describe "with simple FROM and WHERE query combinations" do + let(:config) do + super().merge("query" => "FROM #{es_index} | WHERE type==\"b\" | LIMIT 99") end - describe "when invalid query supplied" do - let(:config) do - super().merge("query" => "FROM undefined index | LIMIT 1") - end + include_examples "ESQL query execution", 2 + end - it "tags on failure" do - plugin.filter(event) - expect(event.to_hash["tags"]).to include("_elasticsearch_lookup_failure") - end + describe "when invalid query used" do + let(:config) do + super().merge("query" => "FROM undefined index | LIMIT 1") + end + + it "tags on failure" do + plugin.filter(event) + expect(event.to_hash["tags"]).to include("_elasticsearch_lookup_failure") + end + end + + describe "when field enrichment requested" do + fields = {"message" => "target_message", "count" => "target_count"} + let(:config) do + super().merge("query" => "FROM #{es_index} | WHERE type==\"b\" | LIMIT 99", "fields" => fields) + end + + include_examples "ESQL query execution", 2, fields + end + + describe "when non-exist field value appear" do + let(:config) do + super().merge("query" => "FROM #{es_index} | LIMIT 99", "fields" => {"message" => "target_message", "count" => "target_count"}) + end + + it "processes the event" do + plugin.filter(event) + expect(event.get("[@metadata][total_hits]")).to eq(6) + expect(event.get("target_message").size).to eq(6) + expect(event.get("target_count").size).to eq(5) end end end From 712c9c4da8340e2d81c5c34a70af48e15364a6c4 Mon Sep 17 00:00:00 2001 From: Mashhur Date: Mon, 5 May 2025 19:47:41 -0700 Subject: [PATCH 05/21] Update ES|QL supported plugin version in the docs. --- docs/index.asciidoc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/index.asciidoc b/docs/index.asciidoc index 21ccc0e..94c7dc0 100644 --- a/docs/index.asciidoc +++ b/docs/index.asciidoc @@ -128,7 +128,7 @@ To utilize the ES|QL feature with this plugin, the following version requirement |Component |Minimum version |{es} |8.11.0 or newer |{ls} |8.17.4 or newer -|This plugin |4.23.0+ (4.x series) or 5.2.0+ (5.x series) +|This plugin |3.19.0+ (3.x series) or 4.3.0+ (4.x series) |=== To configure ES|QL query in the plugin, set your ES|QL query in the `query` parameter. From f7a8c619f56b07d2d3a8279352ed71cca636acde Mon Sep 17 00:00:00 2001 From: Mashhur Date: Mon, 5 May 2025 19:50:02 -0700 Subject: [PATCH 06/21] Remove drop_null_columns param from doc reference. --- docs/index.asciidoc | 1 - 1 file changed, 1 deletion(-) diff --git a/docs/index.asciidoc b/docs/index.asciidoc index 94c7dc0..b67b073 100644 --- a/docs/index.asciidoc +++ b/docs/index.asciidoc @@ -396,7 +396,6 @@ Accepted options: |=== |Option name |Default value | Description -|`drop_null_columns` |`false` | Requests {es} to filter out `null` columns |`named_params` |[] | List of named parameters and their matches used in the `query` |=== From 66e699d9d4c2cb75e0c29121cb6ab5a664150064 Mon Sep 17 00:00:00 2001 From: Mashhur <99575341+mashhurs@users.noreply.github.com> Date: Tue, 6 May 2025 23:55:42 -0700 Subject: [PATCH 07/21] Apply suggestions from code review Accept obvious or agreed code review suggestions. Co-authored-by: Rye Biesemeyer --- docs/index.asciidoc | 13 +++---------- lib/logstash/filters/elasticsearch.rb | 3 +-- lib/logstash/filters/elasticsearch/esql_executor.rb | 2 +- 3 files changed, 5 insertions(+), 13 deletions(-) diff --git a/docs/index.asciidoc b/docs/index.asciidoc index b67b073..b235cc1 100644 --- a/docs/index.asciidoc +++ b/docs/index.asciidoc @@ -119,17 +119,10 @@ Authorization to a secure Elasticsearch cluster requires `read` permission at in The `monitoring` permission at cluster level is necessary to perform periodic connectivity checks. [id="plugins-{type}s-{plugin}-esql"] -==== ES|QL support -{es} Query Language (ES|QL) provides a SQL-like interface for querying your {es} data. +==== {esql} support +{es} Query Language ({esql}) provides a SQL-like interface for querying your {es} data. -To utilize the ES|QL feature with this plugin, the following version requirements must be met: -[cols="1,2",options="header"] -|=== -|Component |Minimum version -|{es} |8.11.0 or newer -|{ls} |8.17.4 or newer -|This plugin |3.19.0+ (3.x series) or 4.3.0+ (4.x series) -|=== +To use {esql}, this plugin needs to be installed in {ls} 8.17.4 or newer, and must be connected to {es} 8.11 or newer. To configure ES|QL query in the plugin, set your ES|QL query in the `query` parameter. diff --git a/lib/logstash/filters/elasticsearch.rb b/lib/logstash/filters/elasticsearch.rb index dc73c90..0006601 100644 --- a/lib/logstash/filters/elasticsearch.rb +++ b/lib/logstash/filters/elasticsearch.rb @@ -483,11 +483,10 @@ def validate_esql_query_and_params! named_params = original_query_params["named_params"] ||= [] named_params_keys = named_params.map(&:keys).flatten - placeholders = @query.scan(/\?(\w+)/).flatten + placeholders = @query.scan(/(?<=[?])[a-z_][a-z0-9_]*/i) raise LogStash::ConfigurationError, "Number of placeholders in `query` and `named_params` do not match" unless placeholders.size == named_params_keys.size placeholders.each do |placeholder| - placeholder.delete_prefix!("?") raise LogStash::ConfigurationError, "Placeholder #{placeholder} not found in query" unless named_params_keys.include?(placeholder) end end diff --git a/lib/logstash/filters/elasticsearch/esql_executor.rb b/lib/logstash/filters/elasticsearch/esql_executor.rb index 1b40740..8b4be6e 100644 --- a/lib/logstash/filters/elasticsearch/esql_executor.rb +++ b/lib/logstash/filters/elasticsearch/esql_executor.rb @@ -88,7 +88,7 @@ def add_requested_fields(event, columns, values) column_index = columns.find_index { |col| col['name'] == old_key } next unless column_index - row_values = values.map { |entry| entry[column_index] }&.compact # remove non-exist field values with compact + row_values = values[column_index]&.compact # remove non-exist field values with compact # TODO: set to the target field once target support is added event.set(new_key, row_values.one? ? row_values.first : row_values) if row_values&.size > 0 end From a42ccd6d6d6a8bcf85902ee5edef11e38674b460 Mon Sep 17 00:00:00 2001 From: Mashhur <99575341+mashhurs@users.noreply.github.com> Date: Thu, 8 May 2025 15:56:04 -0700 Subject: [PATCH 08/21] Apply suggestions from code review Separate DSL and ESQL interface in the client. Co-authored-by: Rye Biesemeyer --- lib/logstash/filters/elasticsearch/client.rb | 8 ++++++-- lib/logstash/filters/elasticsearch/esql_executor.rb | 2 +- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/lib/logstash/filters/elasticsearch/client.rb b/lib/logstash/filters/elasticsearch/client.rb index 5265c8f..0f8f284 100644 --- a/lib/logstash/filters/elasticsearch/client.rb +++ b/lib/logstash/filters/elasticsearch/client.rb @@ -55,8 +55,12 @@ def initialize(logger, hosts, options = {}) @client = ::Elasticsearch::Client.new(client_options) end - def search(params={}, query_type = 'dsl') - query_type == 'esql' ? @client.esql.query(params) : @client.search(params) + def search(params={}) + @client.search(params) + end + + def esql_query(params={}) + @client.esql.query(params) end def info diff --git a/lib/logstash/filters/elasticsearch/esql_executor.rb b/lib/logstash/filters/elasticsearch/esql_executor.rb index 8b4be6e..f680e2d 100644 --- a/lib/logstash/filters/elasticsearch/esql_executor.rb +++ b/lib/logstash/filters/elasticsearch/esql_executor.rb @@ -54,7 +54,7 @@ def resolve_parameters(event) def execute_query(client, params) # debug logs may help to check what query shape the plugin is sending to ES @logger.debug("Executing ES|QL query", query: @query, params: params) - client.search({ body: { query: @query, params: params }, format: 'json', drop_null_columns: true }, 'esql') + client.esql_query({ body: { query: @query, params: params }, format: 'json', drop_null_columns: true }, 'esql') end def process_response(event, response) From bf43526fd7b3e10bfeff5a51edc94924395f5e1e Mon Sep 17 00:00:00 2001 From: Mashhur Date: Thu, 8 May 2025 22:49:31 -0700 Subject: [PATCH 09/21] Rebase against upstream main after target support added. Separate unit test for DSL. Address comments: do not save ES version in client, add apply target method in executors, set to target if target is defined, docs update. Co-authored-by: Rye Biesemeyer --- docs/index.asciidoc | 10 +- lib/logstash/filters/elasticsearch.rb | 25 +- lib/logstash/filters/elasticsearch/client.rb | 2 +- .../filters/elasticsearch/dsl_executor.rb | 32 +- .../filters/elasticsearch/esql_executor.rb | 30 +- spec/filters/elasticsearch_dsl_spec.rb | 372 ++++++++++++++++++ spec/filters/elasticsearch_esql_spec.rb | 15 +- spec/filters/elasticsearch_spec.rb | 356 +---------------- 8 files changed, 444 insertions(+), 398 deletions(-) create mode 100644 spec/filters/elasticsearch_dsl_spec.rb diff --git a/docs/index.asciidoc b/docs/index.asciidoc index b235cc1..884b95a 100644 --- a/docs/index.asciidoc +++ b/docs/index.asciidoc @@ -375,10 +375,11 @@ environment variables e.g. `proxy => '${LS_PROXY:}'`. * Value type is <> * There is no default value for this setting. -Elasticsearch query string. More information is available in the -{ref}/query-dsl-query-string-query.html#query-string-syntax[Elasticsearch query -string documentation]. -Use either `query` or `query_template`. +The query to be executed. +Accepted query shape is DSL query string or ES|QL. +For the DSL query string, use either `query` or `query_template`. +Read the {ref}/query-dsl-query-string-query.html[{es} query +string documentation] or {ref}/esql.html[{es} ES|QL documentation] for more information. [id="plugins-{type}s-{plugin}-query_params"] ===== `query_params` @@ -392,7 +393,6 @@ Accepted options: |`named_params` |[] | List of named parameters and their matches used in the `query` |=== - [id="plugins-{type}s-{plugin}-query_template"] ===== `query_template` diff --git a/lib/logstash/filters/elasticsearch.rb b/lib/logstash/filters/elasticsearch.rb index 0006601..177c424 100644 --- a/lib/logstash/filters/elasticsearch.rb +++ b/lib/logstash/filters/elasticsearch.rb @@ -235,18 +235,6 @@ def prepare_user_agent private - # if @target is defined, creates a nested structure to inject result into target field - # if not defined, directly sets to the top-level event field - # @param event [LogStash::Event] - # @param new_key [String] name of the field to set - # @param value_to_set [Array] values to set - # @return [void] - def set_to_event_target(event, new_key, value_to_set) - key_to_set = target ? "[#{target}][#{new_key}]" : new_key - - event.set(key_to_set, value_to_set) - end - def client_options @client_options ||= { :user => @user, @@ -438,9 +426,9 @@ def setup_ssl_params! params['ssl_enabled'] = @ssl_enabled ||= Array(@hosts).all? { |host| host && host.to_s.start_with?("https") } end -def resolve_query_type - @query&.strip&.match?(/\A(?:FROM|ROW|SHOW)/) ? "esql": "dsl" -end + def resolve_query_type + @query&.strip&.match?(/\A(?:FROM|ROW|SHOW)/) ? "esql": "dsl" + end def validate_dsl_query_settings! #Load query if it exists @@ -484,8 +472,6 @@ def validate_esql_query_and_params! named_params_keys = named_params.map(&:keys).flatten placeholders = @query.scan(/(?<=[?])[a-z_][a-z0-9_]*/i) - raise LogStash::ConfigurationError, "Number of placeholders in `query` and `named_params` do not match" unless placeholders.size == named_params_keys.size - placeholders.each do |placeholder| raise LogStash::ConfigurationError, "Placeholder #{placeholder} not found in query" unless named_params_keys.include?(placeholder) end @@ -493,8 +479,9 @@ def validate_esql_query_and_params! def validate_es_for_esql_support! # make sure connected ES supports ES|QL (8.11+) - es_supports_esql = Gem::Version.create(get_client.es_version) >= Gem::Version.create(ES_ESQL_SUPPORT_VERSION) - fail("Connected Elasticsearch #{get_client.es_version} version does not supports ES|QL. ES|QL feature requires at least Elasticsearch #{ES_ESQL_SUPPORT_VERSION} version.") unless es_supports_esql + @es_version ||= get_client.es_version + es_supports_esql = Gem::Version.create(@es_version) >= Gem::Version.create(ES_ESQL_SUPPORT_VERSION) + fail("Connected Elasticsearch #{@es_version} version does not supports ES|QL. ES|QL feature requires at least Elasticsearch #{ES_ESQL_SUPPORT_VERSION} version.") unless es_supports_esql end end #class LogStash::Filters::Elasticsearch diff --git a/lib/logstash/filters/elasticsearch/client.rb b/lib/logstash/filters/elasticsearch/client.rb index 0f8f284..3174550 100644 --- a/lib/logstash/filters/elasticsearch/client.rb +++ b/lib/logstash/filters/elasticsearch/client.rb @@ -68,7 +68,7 @@ def info end def es_version - @es_version ||= info&.dig('version', 'number') + info&.dig('version', 'number') end def build_flavor diff --git a/lib/logstash/filters/elasticsearch/dsl_executor.rb b/lib/logstash/filters/elasticsearch/dsl_executor.rb index a5c7b6a..bf92050 100644 --- a/lib/logstash/filters/elasticsearch/dsl_executor.rb +++ b/lib/logstash/filters/elasticsearch/dsl_executor.rb @@ -5,7 +5,6 @@ module Filters class Elasticsearch class DslExecutor def initialize(plugin, logger) - @plugin = plugin @index = plugin.params["index"] @query = plugin.params["query"] @query_dsl = plugin.query_dsl @@ -17,6 +16,13 @@ def initialize(plugin, logger) @sort = plugin.params["sort"] @aggregation_fields = plugin.params["aggregation_fields"] @logger = logger + @event_decorator = plugin.method(:decorate) + @target_field = plugin.params["target"] + if @target_field + def self.apply_target(path); "[#{@target_field}][#{path}]"; end + else + def self.apply_target(path); path; end + end end def process(client, event) @@ -46,17 +52,19 @@ def process(client, event) matched = true @fields.each do |old_key, new_key| old_key_path = extract_path(old_key) - set = result_hits.map do |doc| + extracted_hit_values = result_hits.map do |doc| extract_value(doc["_source"], old_key_path) end - event.set(new_key, set.count > 1 ? set : set.first) + value_to_set = extracted_hit_values.count > 1 ? extracted_hit_values : extracted_hit_values.first + set_to_event_target(event, new_key, value_to_set) end @docinfo_fields.each do |old_key, new_key| old_key_path = extract_path(old_key) - set = result_hits.map do |doc| + extracted_docs_info = result_hits.map do |doc| extract_value(doc, old_key_path) end - event.set(new_key, set.count > 1 ? set : set.first) + value_to_set = extracted_docs_info.count > 1 ? extracted_docs_info : extracted_docs_info.first + set_to_event_target(event, new_key, value_to_set) end end @@ -64,7 +72,7 @@ def process(client, event) if !result_aggregations.nil? && !result_aggregations.empty? matched = true @aggregation_fields.each do |agg_name, ls_field| - event.set(ls_field, result_aggregations[agg_name]) + set_to_event_target(event, ls_field, result_aggregations[agg_name]) end end @@ -78,7 +86,7 @@ def process(client, event) end @tag_on_failure.each { |tag| event.tag(tag) } else - @plugin.decorate(event) if matched + @event_decorator.call(event) if matched end end @@ -116,6 +124,16 @@ def extract_value(source, path) end end + # if @target is defined, creates a nested structure to inject result into target field + # if not defined, directly sets to the top-level event field + # @param event [LogStash::Event] + # @param new_key [String] name of the field to set + # @param value_to_set [Array] values to set + # @return [void] + def set_to_event_target(event, new_key, value_to_set) + key_to_set = self.apply_target(new_key) + event.set(key_to_set, value_to_set) + end end end end diff --git a/lib/logstash/filters/elasticsearch/esql_executor.rb b/lib/logstash/filters/elasticsearch/esql_executor.rb index f680e2d..ef8fce3 100644 --- a/lib/logstash/filters/elasticsearch/esql_executor.rb +++ b/lib/logstash/filters/elasticsearch/esql_executor.rb @@ -6,9 +6,9 @@ class Elasticsearch class EsqlExecutor def initialize(plugin, logger) - @plugin = plugin @logger = logger + @event_decorator = plugin.method(:decorate) @query = plugin.params["query"] if @query.strip.start_with?("FROM") && !@query.match?(/\|\s*LIMIT/) @logger.warn("ES|QL query doesn't contain LIMIT, adding `| LIMIT 1` to optimize the performance") @@ -20,6 +20,13 @@ def initialize(plugin, logger) @fields = plugin.params["fields"] @tag_on_failure = plugin.params["tag_on_failure"] @logger.debug("ES|QL query executor initialized with ", query: @query, named_params: @named_params) + + @target_field = plugin.params["target"] + if @target_field + def self.apply_target(path); "[#{@target_field}][#{path}]"; end + else + def self.apply_target(path); path; end + end end def process(client, event) @@ -27,7 +34,7 @@ def process(client, event) response = execute_query(client, resolved_params) inform_warning(response) process_response(event, response) - @plugin.decorate(event) + @event_decorator.call(event) rescue => e @logger.error("Failed to process ES|QL filter", exception: e) @tag_on_failure.each { |tag| event.tag(tag) } @@ -54,7 +61,7 @@ def resolve_parameters(event) def execute_query(client, params) # debug logs may help to check what query shape the plugin is sending to ES @logger.debug("Executing ES|QL query", query: @query, params: params) - client.esql_query({ body: { query: @query, params: params }, format: 'json', drop_null_columns: true }, 'esql') + client.esql_query({ body: { query: @query, params: params }, format: 'json', drop_null_columns: true }) end def process_response(event, response) @@ -88,11 +95,22 @@ def add_requested_fields(event, columns, values) column_index = columns.find_index { |col| col['name'] == old_key } next unless column_index - row_values = values[column_index]&.compact # remove non-exist field values with compact - # TODO: set to the target field once target support is added - event.set(new_key, row_values.one? ? row_values.first : row_values) if row_values&.size > 0 + row_values = values.map { |entry| entry[column_index] }&.compact + value_to_set = row_values.count > 1 ? row_values : row_values.first + set_to_event_target(event, new_key, value_to_set) unless value_to_set.nil? end end + + # if @target is defined, creates a nested structure to inject result into target field + # if not defined, directly sets to the top-level event field + # @param event [LogStash::Event] + # @param new_key [String] name of the field to set + # @param value_to_set [Array] values to set + # @return [void] + def set_to_event_target(event, new_key, value_to_set) + key_to_set = self.apply_target(new_key) + event.set(key_to_set, value_to_set) + end end end end diff --git a/spec/filters/elasticsearch_dsl_spec.rb b/spec/filters/elasticsearch_dsl_spec.rb new file mode 100644 index 0000000..2885caa --- /dev/null +++ b/spec/filters/elasticsearch_dsl_spec.rb @@ -0,0 +1,372 @@ +# encoding: utf-8 +require "logstash/devutils/rspec/spec_helper" +require "logstash/filters/elasticsearch" + +describe LogStash::Filters::Elasticsearch::DslExecutor do + let(:client) { instance_double(LogStash::Filters::ElasticsearchClient) } + let(:logger) { double("logger") } + let(:plugin) { LogStash::Filters::Elasticsearch.new(plugin_config) } + let(:plugin_config) do + { + "index" => "test_index", + "query" => "test_query", + "fields" => { "field1" => "field1_mapped" }, + "result_size" => 10, + "docinfo_fields" => { "_id" => "doc_id" }, + "tag_on_failure" => ["_failure"], + "enable_sort" => true, + "sort" => "@timestamp:desc", + "aggregation_fields" => { "agg1" => "agg1_mapped" } + } + end + let(:dsl_executor) { described_class.new(plugin, logger) } + let(:event) { LogStash::Event.new({}) } + + describe "#initialize" do + it "initializes instance variables correctly" do + expect(dsl_executor.instance_variable_get(:@index)).to eq("test_index") + expect(dsl_executor.instance_variable_get(:@query)).to eq("test_query") + expect(dsl_executor.instance_variable_get(:@query_dsl)).to eq(nil) + expect(dsl_executor.instance_variable_get(:@fields)).to eq({ "field1" => "field1_mapped" }) + expect(dsl_executor.instance_variable_get(:@result_size)).to eq(10) + expect(dsl_executor.instance_variable_get(:@docinfo_fields)).to eq({ "_id" => "doc_id" }) + expect(dsl_executor.instance_variable_get(:@tag_on_failure)).to eq(["_failure"]) + expect(dsl_executor.instance_variable_get(:@enable_sort)).to eq(true) + expect(dsl_executor.instance_variable_get(:@sort)).to eq("@timestamp:desc") + expect(dsl_executor.instance_variable_get(:@aggregation_fields)).to eq({ "agg1" => "agg1_mapped" }) + expect(dsl_executor.instance_variable_get(:@logger)).to eq(logger) + expect(dsl_executor.instance_variable_get(:@event_decorator)).not_to be_nil + end + end + + describe "data fetch" do + let(:plugin_config) do + { + "hosts" => ["localhost:9200"], + "query" => "response: 404", + "fields" => { "response" => "code" }, + "docinfo_fields" => { "_index" => "es_index" }, + "aggregation_fields" => { "bytes_avg" => "bytes_avg_ls_field" } + } + end + + let(:response) do + LogStash::Json.load(File.read(File.join(File.dirname(__FILE__), "fixtures", "request_x_1.json"))) + end + + let(:client) { double(:client) } + + before(:each) do + allow(LogStash::Filters::ElasticsearchClient).to receive(:new).and_return(client) + if defined?(Elastic::Transport) + allow(client).to receive(:es_transport_client_type).and_return('elastic_transport') + else + allow(client).to receive(:es_transport_client_type).and_return('elasticsearch_transport') + end + allow(client).to receive(:search).and_return(response) + allow(plugin).to receive(:test_connection!) + allow(plugin).to receive(:setup_serverless) + plugin.register + end + + after(:each) do + Thread.current[:filter_elasticsearch_client] = nil + end + + it "should enhance the current event with new data" do + plugin.filter(event) + expect(event.get("code")).to eq(404) + expect(event.get("es_index")).to eq("logstash-2014.08.26") + expect(event.get("bytes_avg_ls_field")["value"]).to eq(294) + end + + it "should receive all necessary params to perform the search" do + expect(client).to receive(:search).with({:q=>"response: 404", :size=>1, :index=>"", :sort=>"@timestamp:desc"}) + plugin.filter(event) + end + + context "when asking to hit specific index" do + + let(:plugin_config) do + { + "index" => "foo*", + "hosts" => ["localhost:9200"], + "query" => "response: 404", + "fields" => { "response" => "code" } + } + end + + it "should receive all necessary params to perform the search" do + expect(client).to receive(:search).with({:q=>"response: 404", :size=>1, :index=>"foo*", :sort=>"@timestamp:desc"}) + plugin.filter(event) + end + end + + context "when asking for more than one result" do + + let(:plugin_config) do + { + "hosts" => ["localhost:9200"], + "query" => "response: 404", + "fields" => { "response" => "code" }, + "result_size" => 10 + } + end + + let(:response) do + LogStash::Json.load(File.read(File.join(File.dirname(__FILE__), "fixtures", "request_x_10.json"))) + end + + it "should enhance the current event with new data" do + plugin.filter(event) + expect(event.get("code")).to eq([404]*10) + end + end + + context 'when Elasticsearch 7.x gives us a totals object instead of an integer' do + let(:plugin_config) do + { + "hosts" => ["localhost:9200"], + "query" => "response: 404", + "fields" => { "response" => "code" }, + "result_size" => 10 + } + end + + let(:response) do + LogStash::Json.load(File.read(File.join(File.dirname(__FILE__), "fixtures", "elasticsearch_7.x_hits_total_as_object.json"))) + end + + it "should enhance the current event with new data" do + plugin.filter(event) + expect(event.get("[@metadata][total_hits]")).to eq(13476) + end + end + + context "if something wrong happen during connection" do + + before(:each) do + allow(LogStash::Filters::ElasticsearchClient).to receive(:new).and_return(client) + allow(client).to receive(:search).and_raise("connection exception") + plugin.register + end + + it "tag the event as something happened, but still deliver it" do + expect(plugin.logger).to receive(:warn) + plugin.filter(event) + expect(event.to_hash["tags"]).to include("_elasticsearch_lookup_failure") + end + end + + # Tagging test for positive results + context "Tagging should occur if query returns results" do + let(:plugin_config) do + { + "index" => "foo*", + "hosts" => ["localhost:9200"], + "query" => "response: 404", + "add_tag" => ["tagged"] + } + end + + let(:response) do + LogStash::Json.load(File.read(File.join(File.dirname(__FILE__), "fixtures", "request_x_10.json"))) + end + + it "should tag the current event if results returned" do + plugin.filter(event) + expect(event.to_hash["tags"]).to include("tagged") + end + end + + context "an aggregation search with size 0 that matches" do + let(:plugin_config) do + { + "index" => "foo*", + "hosts" => ["localhost:9200"], + "query" => "response: 404", + "add_tag" => ["tagged"], + "result_size" => 0, + "aggregation_fields" => { "bytes_avg" => "bytes_avg_ls_field" } + } + end + + let(:response) do + LogStash::Json.load(File.read(File.join(File.dirname(__FILE__), "fixtures", "request_size0_agg.json"))) + end + + it "should tag the current event" do + plugin.filter(event) + expect(event.get("tags")).to include("tagged") + expect(event.get("bytes_avg_ls_field")["value"]).to eq(294) + end + end + + # Tagging test for negative results + context "Tagging should not occur if query has no results" do + let(:plugin_config) do + { + "index" => "foo*", + "hosts" => ["localhost:9200"], + "query" => "response: 404", + "add_tag" => ["tagged"] + } + end + + let(:response) do + LogStash::Json.load(File.read(File.join(File.dirname(__FILE__), "fixtures", "request_error.json"))) + end + + it "should not tag the current event" do + plugin.filter(event) + expect(event.to_hash["tags"]).to_not include("tagged") + end + end + context "testing a simple query template" do + let(:plugin_config) do + { + "hosts" => ["localhost:9200"], + "query_template" => File.join(File.dirname(__FILE__), "fixtures", "query_template.json"), + "fields" => { "response" => "code" }, + "result_size" => 1 + } + end + + let(:response) do + LogStash::Json.load(File.read(File.join(File.dirname(__FILE__), "fixtures", "request_x_1.json"))) + end + + it "should enhance the current event with new data" do + plugin.filter(event) + expect(event.get("code")).to eq(404) + end + + end + + context "testing a simple index substitution" do + let(:event) { + LogStash::Event.new( + { + "subst_field" => "subst_value" + } + ) + } + let(:plugin_config) do + { + "index" => "foo_%{subst_field}*", + "hosts" => ["localhost:9200"], + "query" => "response: 404", + "fields" => { "response" => "code" } + } + end + + it "should receive substituted index name" do + expect(client).to receive(:search).with({:q => "response: 404", :size => 1, :index => "foo_subst_value*", :sort => "@timestamp:desc"}) + plugin.filter(event) + end + end + + context "if query result errored but no exception is thrown" do + let(:response) do + LogStash::Json.load(File.read(File.join(File.dirname(__FILE__), "fixtures", "request_error.json"))) + end + + before(:each) do + allow(LogStash::Filters::ElasticsearchClient).to receive(:new).and_return(client) + allow(client).to receive(:search).and_return(response) + plugin.register + end + + it "tag the event as something happened, but still deliver it" do + expect(plugin.logger).to receive(:warn) + plugin.filter(event) + expect(event.to_hash["tags"]).to include("_elasticsearch_lookup_failure") + end + end + + context 'with client-level retries' do + let(:plugin_config) do + super().merge( + "retry_on_failure" => 3, + "retry_on_status" => [500] + ) + end + end + + context "with custom headers" do + let(:plugin_config) do + { + "query" => "*", + "custom_headers" => { "Custom-Header-1" => "Custom Value 1", "Custom-Header-2" => "Custom Value 2" } + } + end + + let(:plugin) { LogStash::Filters::Elasticsearch.new(plugin_config) } + let(:client_double) { double("client") } + let(:transport_double) { double("transport", options: { transport_options: { headers: plugin_config["custom_headers"] } }) } + + before do + allow(plugin).to receive(:get_client).and_return(client_double) + if defined?(Elastic::Transport) + allow(client_double).to receive(:es_transport_client_type).and_return('elastic_transport') + else + allow(client_double).to receive(:es_transport_client_type).and_return('elasticsearch_transport') + end + allow(client_double).to receive(:client).and_return(transport_double) + end + + it "sets custom headers" do + plugin.register + client = plugin.send(:get_client).client + expect(client.options[:transport_options][:headers]).to match(hash_including(plugin_config["custom_headers"])) + end + end + + context "if query is on nested field" do + let(:plugin_config) do + { + "hosts" => ["localhost:9200"], + "query" => "response: 404", + "fields" => [ ["[geoip][ip]", "ip_address"] ] + } + end + + it "should enhance the current event with new data" do + plugin.filter(event) + expect(event.get("ip_address")).to eq("66.249.73.185") + end + + end + end + + describe "#set_to_event_target" do + it 'is ready to set to `target`' do + expect(dsl_executor.apply_target("path")).to eq("path") + end + + context "when `@target` is nil, default behavior" do + it "sets the value directly to the top-level event field" do + dsl_executor.send(:set_to_event_target, event, "new_field", %w[value1 value2]) + expect(event.get("new_field")).to eq(%w[value1 value2]) + end + end + + context "when @target is defined" do + let(:plugin_config) { + super().merge({ "target" => "nested" }) + } + + it "creates a nested structure under the target field" do + dsl_executor.send(:set_to_event_target, event, "new_field", %w[value1 value2]) + expect(event.get("nested")).to eq({ "new_field" => %w[value1 value2] }) + end + + it "overwrites existing target field with new data" do + event.set("nested", { "existing_field" => "existing_value", "new_field" => "value0" }) + dsl_executor.send(:set_to_event_target, event, "new_field", ["value1"]) + expect(event.get("nested")).to eq({ "existing_field" => "existing_value", "new_field" => ["value1"] }) + end + end + end + +end diff --git a/spec/filters/elasticsearch_esql_spec.rb b/spec/filters/elasticsearch_esql_spec.rb index 0da3e51..4df7829 100644 --- a/spec/filters/elasticsearch_esql_spec.rb +++ b/spec/filters/elasticsearch_esql_spec.rb @@ -14,7 +14,7 @@ let(:esql_executor) { described_class.new(plugin, logger) } context "when initializes" do - it "sets up the ESQL client with correct parameters" do + it "sets up the ESQL executor with correct parameters" do allow(logger).to receive(:debug) expect(esql_executor.instance_variable_get(:@query)).to eq(plugin_config["query"]) expect(esql_executor.instance_variable_get(:@named_params)).to eq([]) @@ -49,9 +49,8 @@ it "executes the query with resolved parameters" do allow(logger).to receive(:debug) expect(event).to receive(:get).with("[bar]").and_return("resolved_value") - expect(client).to receive(:search).with( - { body: { query: plugin_config["query"], params: [{ "foo" => "resolved_value" }] }, format: 'json', drop_null_columns: true, }, - 'esql') + expect(client).to receive(:esql_query).with( + { body: { query: plugin_config["query"], params: [{ "foo" => "resolved_value" }] }, format: 'json', drop_null_columns: true, }) resolved_params = esql_executor.send(:resolve_parameters, event) esql_executor.send(:execute_query, client, resolved_params) end @@ -72,14 +71,12 @@ allow(plugin).to receive(:decorate) allow(logger).to receive(:debug) allow(response).to receive(:headers).and_return({}) - expect(client).to receive(:search).with( + expect(client).to receive(:esql_query).with( { body: { query: plugin_config["query"], params: [{"foo"=>"resolve_me"}] }, format: 'json', drop_null_columns: true, - }, - 'esql' - ).and_return(response) + }).and_return(response) event = LogStash::Event.new({ "hello" => "world", "bar" => "resolve_me" }) expect { esql_executor.process(client, event) }.to_not raise_error @@ -100,7 +97,7 @@ it "tags on query execution failures" do allow(logger).to receive(:debug) - allow(client).to receive(:search).and_raise("Query execution error") + allow(client).to receive(:esql_query).and_raise("Query execution error") expect(logger).to receive(:error).with("Failed to process ES|QL filter", exception: instance_of(RuntimeError)) expect(event).to receive(:tag).with("_elasticsearch_lookup_failure") diff --git a/spec/filters/elasticsearch_spec.rb b/spec/filters/elasticsearch_spec.rb index ad1be19..1478baa 100644 --- a/spec/filters/elasticsearch_spec.rb +++ b/spec/filters/elasticsearch_spec.rb @@ -61,7 +61,7 @@ allow(filter_client).to receive(:serverless?).and_return(true) allow(filter_client).to receive(:client).and_return(es_client) - if elastic_ruby_v8_client_available? + if defined?(Elastic::Transport) allow(es_client).to receive(:info) .with(a_hash_including( :headers => LogStash::Filters::ElasticsearchClient::DEFAULT_EAV_HEADER)) @@ -93,306 +93,6 @@ end end - describe "data fetch" do - let(:config) do - { - "hosts" => ["localhost:9200"], - "query" => "response: 404", - "fields" => { "response" => "code" }, - "docinfo_fields" => { "_index" => "es_index" }, - "aggregation_fields" => { "bytes_avg" => "bytes_avg_ls_field" } - } - end - - let(:response) do - LogStash::Json.load(File.read(File.join(File.dirname(__FILE__), "fixtures", "request_x_1.json"))) - end - - let(:client) { double(:client) } - - before(:each) do - allow(LogStash::Filters::ElasticsearchClient).to receive(:new).and_return(client) - if elastic_ruby_v8_client_available? - allow(client).to receive(:es_transport_client_type).and_return('elastic_transport') - else - allow(client).to receive(:es_transport_client_type).and_return('elasticsearch_transport') - end - allow(client).to receive(:search).and_return(response) - allow(plugin).to receive(:test_connection!) - allow(plugin).to receive(:setup_serverless) - plugin.register - end - - after(:each) do - Thread.current[:filter_elasticsearch_client] = nil - end - - it "should enhance the current event with new data" do - plugin.filter(event) - expect(event.get("code")).to eq(404) - expect(event.get("es_index")).to eq("logstash-2014.08.26") - expect(event.get("bytes_avg_ls_field")["value"]).to eq(294) - end - - it "should receive all necessary params to perform the search" do - expect(client).to receive(:search).with({:q=>"response: 404", :size=>1, :index=>"", :sort=>"@timestamp:desc"}) - plugin.filter(event) - end - - context "when asking to hit specific index" do - - let(:config) do - { - "index" => "foo*", - "hosts" => ["localhost:9200"], - "query" => "response: 404", - "fields" => { "response" => "code" } - } - end - - it "should receive all necessary params to perform the search" do - expect(client).to receive(:search).with({:q=>"response: 404", :size=>1, :index=>"foo*", :sort=>"@timestamp:desc"}) - plugin.filter(event) - end - end - - context "when asking for more than one result" do - - let(:config) do - { - "hosts" => ["localhost:9200"], - "query" => "response: 404", - "fields" => { "response" => "code" }, - "result_size" => 10 - } - end - - let(:response) do - LogStash::Json.load(File.read(File.join(File.dirname(__FILE__), "fixtures", "request_x_10.json"))) - end - - it "should enhance the current event with new data" do - plugin.filter(event) - expect(event.get("code")).to eq([404]*10) - end - end - - context 'when Elasticsearch 7.x gives us a totals object instead of an integer' do - let(:config) do - { - "hosts" => ["localhost:9200"], - "query" => "response: 404", - "fields" => { "response" => "code" }, - "result_size" => 10 - } - end - - let(:response) do - LogStash::Json.load(File.read(File.join(File.dirname(__FILE__), "fixtures", "elasticsearch_7.x_hits_total_as_object.json"))) - end - - it "should enhance the current event with new data" do - plugin.filter(event) - expect(event.get("[@metadata][total_hits]")).to eq(13476) - end - end - - context "if something wrong happen during connection" do - - before(:each) do - allow(LogStash::Filters::ElasticsearchClient).to receive(:new).and_return(client) - allow(client).to receive(:search).and_raise("connection exception") - plugin.register - end - - it "tag the event as something happened, but still deliver it" do - expect(plugin.logger).to receive(:warn) - plugin.filter(event) - expect(event.to_hash["tags"]).to include("_elasticsearch_lookup_failure") - end - end - - # Tagging test for positive results - context "Tagging should occur if query returns results" do - let(:config) do - { - "index" => "foo*", - "hosts" => ["localhost:9200"], - "query" => "response: 404", - "add_tag" => ["tagged"] - } - end - - let(:response) do - LogStash::Json.load(File.read(File.join(File.dirname(__FILE__), "fixtures", "request_x_10.json"))) - end - - it "should tag the current event if results returned" do - plugin.filter(event) - expect(event.to_hash["tags"]).to include("tagged") - end - end - - context "an aggregation search with size 0 that matches" do - let(:config) do - { - "index" => "foo*", - "hosts" => ["localhost:9200"], - "query" => "response: 404", - "add_tag" => ["tagged"], - "result_size" => 0, - "aggregation_fields" => { "bytes_avg" => "bytes_avg_ls_field" } - } - end - - let(:response) do - LogStash::Json.load(File.read(File.join(File.dirname(__FILE__), "fixtures", "request_size0_agg.json"))) - end - - it "should tag the current event" do - plugin.filter(event) - expect(event.get("tags")).to include("tagged") - expect(event.get("bytes_avg_ls_field")["value"]).to eq(294) - end - end - - # Tagging test for negative results - context "Tagging should not occur if query has no results" do - let(:config) do - { - "index" => "foo*", - "hosts" => ["localhost:9200"], - "query" => "response: 404", - "add_tag" => ["tagged"] - } - end - - let(:response) do - LogStash::Json.load(File.read(File.join(File.dirname(__FILE__), "fixtures", "request_error.json"))) - end - - it "should not tag the current event" do - plugin.filter(event) - expect(event.to_hash["tags"]).to_not include("tagged") - end - end - context "testing a simple query template" do - let(:config) do - { - "hosts" => ["localhost:9200"], - "query_template" => File.join(File.dirname(__FILE__), "fixtures", "query_template.json"), - "fields" => { "response" => "code" }, - "result_size" => 1 - } - end - - let(:response) do - LogStash::Json.load(File.read(File.join(File.dirname(__FILE__), "fixtures", "request_x_1.json"))) - end - - it "should enhance the current event with new data" do - plugin.filter(event) - expect(event.get("code")).to eq(404) - end - - end - - context "testing a simple index substitution" do - let(:event) { - LogStash::Event.new( - { - "subst_field" => "subst_value" - } - ) - } - let(:config) do - { - "index" => "foo_%{subst_field}*", - "hosts" => ["localhost:9200"], - "query" => "response: 404", - "fields" => { "response" => "code" } - } - end - - it "should receive substituted index name" do - expect(client).to receive(:search).with({:q => "response: 404", :size => 1, :index => "foo_subst_value*", :sort => "@timestamp:desc"}) - plugin.filter(event) - end - end - - context "if query result errored but no exception is thrown" do - let(:response) do - LogStash::Json.load(File.read(File.join(File.dirname(__FILE__), "fixtures", "request_error.json"))) - end - - before(:each) do - allow(LogStash::Filters::ElasticsearchClient).to receive(:new).and_return(client) - allow(client).to receive(:search).and_return(response) - plugin.register - end - - it "tag the event as something happened, but still deliver it" do - expect(plugin.logger).to receive(:warn) - plugin.filter(event) - expect(event.to_hash["tags"]).to include("_elasticsearch_lookup_failure") - end - end - - context 'with client-level retries' do - let(:config) do - super().merge( - "retry_on_failure" => 3, - "retry_on_status" => [500] - ) - end - end - - context "with custom headers" do - let(:config) do - { - "query" => "*", - "custom_headers" => { "Custom-Header-1" => "Custom Value 1", "Custom-Header-2" => "Custom Value 2" } - } - end - - let(:plugin) { LogStash::Filters::Elasticsearch.new(config) } - let(:client_double) { double("client") } - let(:transport_double) { double("transport", options: { transport_options: { headers: config["custom_headers"] } }) } - - before do - allow(plugin).to receive(:get_client).and_return(client_double) - if elastic_ruby_v8_client_available? - allow(client_double).to receive(:es_transport_client_type).and_return('elastic_transport') - else - allow(client_double).to receive(:es_transport_client_type).and_return('elasticsearch_transport') - end - allow(client_double).to receive(:client).and_return(transport_double) - end - - it "sets custom headers" do - plugin.register - client = plugin.send(:get_client).client - expect(client.options[:transport_options][:headers]).to match(hash_including(config["custom_headers"])) - end - end - - context "if query is on nested field" do - let(:config) do - { - "hosts" => ["localhost:9200"], - "query" => "response: 404", - "fields" => [ ["[geoip][ip]", "ip_address"] ] - } - end - - it "should enhance the current event with new data" do - plugin.filter(event) - expect(event.get("ip_address")).to eq("66.249.73.185") - end - - end - end - class StoppableServer attr_reader :port @@ -525,7 +225,7 @@ def wait_receive_request # this spec is a safeguard to trigger an assessment of thread-safety should # we choose a different transport adapter in the future. transport_class = extract_transport(client).options.fetch(:transport_class) - if elastic_ruby_v8_client_available? + if defined?(Elastic::Transport) allow(client).to receive(:es_transport_client_type).and_return("elastic_transport") expect(transport_class).to equal ::Elastic::Transport::Transport::HTTP::Manticore else @@ -845,7 +545,7 @@ def wait_receive_request before(:each) do allow(LogStash::Filters::ElasticsearchClient).to receive(:new).and_return(client) - if elastic_ruby_v8_client_available? + if defined?(Elastic::Transport) allow(client).to receive(:es_transport_client_type).and_return('elastic_transport') else allow(client).to receive(:es_transport_client_type).and_return('elasticsearch_transport') @@ -864,33 +564,6 @@ def wait_receive_request end end - describe "#set_to_event_target" do - - context "when `@target` is nil, default behavior" do - let(:config) {{ }} - - it "sets the value directly to the top-level event field" do - plugin.send(:set_to_event_target, event, "new_field", %w[value1 value2]) - expect(event.get("new_field")).to eq(%w[value1 value2]) - end - end - - context "when @target is defined" do - let(:config) {{ "target" => "nested" }} - - it "creates a nested structure under the target field" do - plugin.send(:set_to_event_target, event, "new_field", %w[value1 value2]) - expect(event.get("nested")).to eq({ "new_field" => %w[value1 value2] }) - end - - it "overwrites existing target field with new data" do - event.set("nested", { "existing_field" => "existing_value", "new_field" => "value0" }) - plugin.send(:set_to_event_target, event, "new_field", ["value1"]) - expect(event.get("nested")).to eq({ "existing_field" => "existing_value", "new_field" => ["value1"] }) - end - end - end - describe "ES|QL" do describe "compatibility" do @@ -962,20 +635,8 @@ def wait_receive_request "query_params" => { "named_params" => [{"a" => "b"}] }, } end - it "raises a config error" do - expect { plugin.register }.to raise_error LogStash::ConfigurationError, /Number of placeholders in `query` and `named_params` do not match/ - end - end - - context "when `named_params` doesn't exist but placeholder found" do - let(:config) do - { - "hosts" => ["localhost:9200"], - "query" => "FROM my-index | WHERE a = ?a" - } - end - it "raises a config error" do - expect { plugin.register }.to raise_error LogStash::ConfigurationError, /Number of placeholders in `query` and `named_params` do not match/ + it "doesn't complain since not used" do + expect { plugin.send(:validate_esql_query_and_params!) }.not_to raise_error end end @@ -999,13 +660,6 @@ def extract_transport(client) client.transport.respond_to?(:transport) ? client.transport.transport : client.transport end - def elastic_ruby_v8_client_available? - Elasticsearch::Transport - false - rescue NameError # NameError: uninitialized constant Elasticsearch::Transport if Elastic Ruby client is not available - true - end - class MockResponse attr_reader :code, :headers From b1b00479812d016e6d167f984c70bd57596ff942 Mon Sep 17 00:00:00 2001 From: Mashhur Date: Thu, 8 May 2025 23:29:22 -0700 Subject: [PATCH 10/21] Fix the unit test failures. --- spec/filters/elasticsearch_spec.rb | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/spec/filters/elasticsearch_spec.rb b/spec/filters/elasticsearch_spec.rb index 1478baa..c47db67 100644 --- a/spec/filters/elasticsearch_spec.rb +++ b/spec/filters/elasticsearch_spec.rb @@ -625,7 +625,7 @@ def wait_receive_request it "raises a config error" do expect { plugin.register }.to raise_error LogStash::ConfigurationError, /`query_params => named_params` is required to be array/ end - end + end if LOGSTASH_VERSION >= '8.17.4' context "when `named_params` exists but not placeholder in the query" do let(:config) do @@ -638,7 +638,7 @@ def wait_receive_request it "doesn't complain since not used" do expect { plugin.send(:validate_esql_query_and_params!) }.not_to raise_error end - end + end if LOGSTASH_VERSION >= '8.17.4' context "when placeholder and `named_params` do not match" do let(:config) do @@ -651,7 +651,7 @@ def wait_receive_request it "raises a config error" do expect { plugin.register }.to raise_error LogStash::ConfigurationError, /Placeholder type not found in query/ end - end + end if LOGSTASH_VERSION >= '8.17.4' end def extract_transport(client) From adf100d5f665dcac0bae7c0212ed65f379988782 Mon Sep 17 00:00:00 2001 From: Mashhur Date: Mon, 26 May 2025 23:57:54 -0700 Subject: [PATCH 11/21] Introduce query_type option which accepts dsl or esql to define a query shape. Remove multi-depth nested named_params and keep only top-level query_params which aligns with placeholder structure in the ES|QL. --- docs/index.asciidoc | 38 +++++++++------ lib/logstash/filters/elasticsearch.rb | 44 ++++++++---------- .../filters/elasticsearch/esql_executor.rb | 46 +++++++++---------- spec/filters/elasticsearch_esql_spec.rb | 10 ++-- spec/filters/elasticsearch_spec.rb | 28 ++++++----- .../integration/elasticsearch_esql_spec.rb | 9 ++-- 6 files changed, 90 insertions(+), 85 deletions(-) diff --git a/docs/index.asciidoc b/docs/index.asciidoc index 884b95a..a2c8380 100644 --- a/docs/index.asciidoc +++ b/docs/index.asciidoc @@ -124,11 +124,11 @@ The `monitoring` permission at cluster level is necessary to perform periodic co To use {esql}, this plugin needs to be installed in {ls} 8.17.4 or newer, and must be connected to {es} 8.11 or newer. -To configure ES|QL query in the plugin, set your ES|QL query in the `query` parameter. +To configure {esql} query in the plugin, set your {esql} query in the `query` parameter. -IMPORTANT: We recommend understanding https://www.elastic.co/guide/en/elasticsearch/reference/current/esql-limitations.html[ES|QL current limitations] before using it in production environments. +IMPORTANT: We recommend understanding {ref}/esql-limitations.html[{esql} current limitations] before using it in production environments. -The following is a basic ES|QL query that sets food name to transaction event based on upstream event's food ID: +The following is a basic {esql} query that sets the food name to transaction event based on upstream event's food ID: [source, ruby] filter { elasticsearch { @@ -139,7 +139,7 @@ The following is a basic ES|QL query that sets food name to transaction event ba | WHERE id = "?food_id" ' query_params => { - named_params => ["food_id" => "[food][id]"] + "food_id" => "[food][id]" } fields => { "food.name" => "food_name" } } @@ -147,11 +147,11 @@ The following is a basic ES|QL query that sets food name to transaction event ba Set `config.support_escapes: true` in `logstash.yml` if you need to escape special chars in the query. -In the result event, the plugin sets total result size in `[@metadata][total_hits]` field. It also limits the result size to 1 when `FROM` query is used. +In the result event, the plugin sets total result size in `[@metadata][total_values]` field. It also limits the result size to 1 when `FROM` query is used. -NOTE: If `FROM` execution command used and not `LIMIT` is set, the plugin attaches `| LIMIT 1`. +NOTE: If `LIMIT` isn't set, the plugin attaches `| LIMIT 1`. -For comprehensive ES|QL syntax reference and best practices, see the https://www.elastic.co/guide/en/elasticsearch/reference/current/esql-syntax.html[{es} ES|QL documentation]. +For comprehensive ES|QL syntax reference and best practices, see the https://www.elastic.co/guide/en/elasticsearch/reference/current/esql-syntax.html[{esql} documentation]. [id="plugins-{type}s-{plugin}-options"] ==== Elasticsearch Filter Configuration Options @@ -178,6 +178,7 @@ NOTE: As of version `4.0.0` of this plugin, a number of previously deprecated se | <> |<>|No | <> |<>|No | <> |<>|No +| <> |<>, one of `["dsl", "esql"]`|No | <> |<>|No | <> |<>|No | <> |<>|No @@ -376,22 +377,29 @@ environment variables e.g. `proxy => '${LS_PROXY:}'`. * There is no default value for this setting. The query to be executed. -Accepted query shape is DSL query string or ES|QL. +The accepted query shape is DSL query string or ES|QL. For the DSL query string, use either `query` or `query_template`. Read the {ref}/query-dsl-query-string-query.html[{es} query string documentation] or {ref}/esql.html[{es} ES|QL documentation] for more information. +[id="plugins-{type}s-{plugin}-query_type"] +===== `query_type` + +* Value can be `dsl` or `esql` +* Default value is `dsl` + +Defines the <> shape. +When `dsl`, the query shape must be valid {es} JSON-style string. +When `esql`, the query shape must be a valid {esql} string and `index`, `query_template` and `sort` parameters are not allowed. + [id="plugins-{type}s-{plugin}-query_params"] ===== `query_params` -Parameters to send to {es} together with <>. -Accepted options: -[cols="2,1,3",options="header"] -|=== -|Option name |Default value | Description +* The value type is <> +* There is no default value for this setting -|`named_params` |[] | List of named parameters and their matches used in the `query` -|=== +Named parameters in {esql} to send to {es} together with <>. +Visit {ref}/esql-rest.html#esql-rest-params[passing parameters to query page] for more information. [id="plugins-{type}s-{plugin}-query_template"] ===== `query_template` diff --git a/lib/logstash/filters/elasticsearch.rb b/lib/logstash/filters/elasticsearch.rb index 177c424..e89f18b 100644 --- a/lib/logstash/filters/elasticsearch.rb +++ b/lib/logstash/filters/elasticsearch.rb @@ -27,7 +27,10 @@ class LogStash::Filters::Elasticsearch < LogStash::Filters::Base # Field substitution (e.g. `index-name-%{date_field}`) is available config :index, :validate => :string, :default => "" - # Elasticsearch query string. This can be in DSL or ES|QL query shape. + # A type of Elasticsearch query, provided by @query. + config :query_type, :validate => %w[esql dsl], :default => "dsl" + + # Elasticsearch query string. This can be in DSL or ES|QL query shape defined by @query_type. # Read the Elasticsearch query string documentation. # DSL: https://www.elastic.co/guide/en/elasticsearch/reference/master/query-dsl-query-string-query.html#query-string-syntax # ES|QL: https://www.elastic.co/guide/en/elasticsearch/reference/current/esql.html @@ -139,12 +142,12 @@ class LogStash::Filters::Elasticsearch < LogStash::Filters::Base # What status codes to retry on? config :retry_on_status, :validate => :number, :list => true, :default => [500, 502, 503, 504] - # params to send to ES|QL query, naming params preferred + # named placeholders in ES|QL query # example, - # if query is "FROM my-index | WHERE some_type = ?type" - # named params can be applied as following via query_params: + # if the query is "FROM my-index | WHERE some_type = ?type" + # named placeholders can be applied as the following in query_params: # query_params => { - # "named_params" => [ {"type" => "%{[type]}"}] + # "type" => "%{[type]}" # } config :query_params, :validate => :hash, :default => {} @@ -182,8 +185,7 @@ def self.validate_value(value, validator) attr_reader :query_dsl def register - query_type = resolve_query_type - case query_type + case @query_type when "esql" invalid_params_with_esql = original_params.keys & %w(index query_template sort docinfo_fields aggregation_fields enable_sort result_size) raise LogStash::ConfigurationError, "Configured #{invalid_params_with_esql} params cannot be used with ES|QL query" if invalid_params_with_esql.any? @@ -204,7 +206,7 @@ def register @hosts = Array(@hosts).map { |host| host.to_s } # potential SafeURI#to_s test_connection! - validate_es_for_esql_support! if query_type == "esql" + validate_es_for_esql_support! if @query_type == "esql" setup_serverless if get_client.es_transport_client_type == "elasticsearch_transport" require_relative "elasticsearch/patches/_elasticsearch_transport_http_manticore" @@ -216,11 +218,11 @@ def filter(event) end # def filter def decorate(event) - # Elasticsearch class has an access for `filter_matched` + # this Elasticsearch class has access to `filter_matched` filter_matched(event) end - # public only to be reuse in testing + # public only to be reused in testing def prepare_user_agent os_name = java.lang.System.getProperty('os.name') os_version = java.lang.System.getProperty('os.version') @@ -426,10 +428,6 @@ def setup_ssl_params! params['ssl_enabled'] = @ssl_enabled ||= Array(@hosts).all? { |host| host && host.to_s.start_with?("https") } end - def resolve_query_type - @query&.strip&.match?(/\A(?:FROM|ROW|SHOW)/) ? "esql": "dsl" - end - def validate_dsl_query_settings! #Load query if it exists if @query_template @@ -451,6 +449,10 @@ def validate_query_settings if @query && @query_template raise LogStash::ConfigurationError, "Both `query` and `query_template` are set. Use either `query` or `query_template`." end + + if original_params.keys.include?("query_params") + raise LogStash::ConfigurationError, "`query_params` is not allowed when `query_type => 'dsl'`." + end end def validate_ls_version_for_esql_support! @@ -460,20 +462,12 @@ def validate_ls_version_for_esql_support! end def validate_esql_query_and_params! - accepted_query_params = %w(named_params) - original_query_params = original_params["query_params"] ||= {} - invalid_query_params = original_query_params.keys - accepted_query_params - raise LogStash::ConfigurationError, "#{accepted_query_params} option(s) accepted in `query_params`, but found #{invalid_query_params} invalid option(s)" if invalid_query_params.any? - - is_named_params_array = original_query_params["named_params"] ? original_query_params["named_params"].class.eql?(Array) : true - raise LogStash::ConfigurationError, "`query_params => named_params` is required to be array" unless is_named_params_array - - named_params = original_query_params["named_params"] ||= [] - named_params_keys = named_params.map(&:keys).flatten + illegal_keys = @query_params.keys.reject {|k| k[/^[a-z_][a-z0-9_]*$/] } + raise LogStash::ConfigurationError, "Illegal #{illegal_keys} placeholder names in `query_params`" if illegal_keys.any? placeholders = @query.scan(/(?<=[?])[a-z_][a-z0-9_]*/i) placeholders.each do |placeholder| - raise LogStash::ConfigurationError, "Placeholder #{placeholder} not found in query" unless named_params_keys.include?(placeholder) + raise LogStash::ConfigurationError, "Placeholder #{placeholder} not found in query" unless @query_params.include?(placeholder) end end diff --git a/lib/logstash/filters/elasticsearch/esql_executor.rb b/lib/logstash/filters/elasticsearch/esql_executor.rb index ef8fce3..f035666 100644 --- a/lib/logstash/filters/elasticsearch/esql_executor.rb +++ b/lib/logstash/filters/elasticsearch/esql_executor.rb @@ -10,16 +10,16 @@ def initialize(plugin, logger) @event_decorator = plugin.method(:decorate) @query = plugin.params["query"] - if @query.strip.start_with?("FROM") && !@query.match?(/\|\s*LIMIT/) + unless @query.match?(/\|\s*LIMIT/) @logger.warn("ES|QL query doesn't contain LIMIT, adding `| LIMIT 1` to optimize the performance") @query.concat(' | LIMIT 1') end - query_params = plugin.params["query_params"] || {} - @named_params = query_params["named_params"] || [] + @query_params = plugin.params["query_params"] || {} + @fields = plugin.params["fields"] @tag_on_failure = plugin.params["tag_on_failure"] - @logger.debug("ES|QL query executor initialized with ", query: @query, named_params: @named_params) + @logger.debug("ES|QL query executor initialized with ", query: @query, query_params: @query_params) @target_field = plugin.params["target"] if @target_field @@ -30,7 +30,7 @@ def self.apply_target(path); path; end end def process(client, event) - resolved_params = @named_params&.any? ? resolve_parameters(event) : [] + resolved_params = @query_params&.any? ? resolve_parameters(event) : [] response = execute_query(client, resolved_params) inform_warning(response) process_response(event, response) @@ -43,45 +43,41 @@ def process(client, event) private def resolve_parameters(event) - @named_params.map do |entry| - entry.each_with_object({}) do |(key, value), new_entry| - begin - resolved_value = event.get(value) - @logger.debug("Resolved value for #{key}: #{resolved_value}, its class: #{resolved_value.class}") - new_entry[key] = resolved_value - rescue => e - # catches invalid field reference - @logger.error("Failed to resolve parameter", key: key, value: value, error: e.message) - raise - end + @query_params.each_with_object([]) do |(key, value), resolved_parameters| + begin + resolved_value = event.get(value) + @logger.debug("Resolved value for #{key}: #{resolved_value}, its class: #{resolved_value.class}") + resolved_parameters << { key => resolved_value } if resolved_value + rescue => e + # catches invalid field reference + @logger.error("Failed to resolve parameter", key: key, value: value, error: e.message) + raise end end end def execute_query(client, params) - # debug logs may help to check what query shape the plugin is sending to ES + # debug logs may help to check what query shape the plugin is sending to ES @logger.debug("Executing ES|QL query", query: @query, params: params) client.esql_query({ body: { query: @query, params: params }, format: 'json', drop_null_columns: true }) end def process_response(event, response) - columns = response['columns'].freeze - values = response['values'].freeze + columns = response['columns']&.freeze || [] + values = response['values']&.freeze || [] if values.nil? || values.size == 0 @logger.debug("Empty ES|QL query result", columns: columns, values: values) return end - # this shouldn't never happen but just in case not crash the plugin + # this shouldn't happen but just in case not crash the plugin if columns.nil? || columns.size == 0 @logger.error("No columns exist but received values", columns: columns, values: values) return end - # TODO: do we need to set `total_hits` to target? - # if not, how do we resolve conflict with existing es-input total_hits field? - # FYI: with DSL it stores in `[@metadata][total_hits]` - event.set("[@metadata][total_hits]", values.size) + event.set("[@metadata][total_values]", values.size) + # @logger.debug("Executing ES|QL values size ", values.size) add_requested_fields(event, columns, values) end @@ -101,7 +97,7 @@ def add_requested_fields(event, columns, values) end end - # if @target is defined, creates a nested structure to inject result into target field + # if @target is defined, creates a nested structure to inject a result into the target field # if not defined, directly sets to the top-level event field # @param event [LogStash::Event] # @param new_key [String] name of the field to set diff --git a/spec/filters/elasticsearch_esql_spec.rb b/spec/filters/elasticsearch_esql_spec.rb index 4df7829..c06d23c 100644 --- a/spec/filters/elasticsearch_esql_spec.rb +++ b/spec/filters/elasticsearch_esql_spec.rb @@ -8,6 +8,7 @@ let(:plugin) { LogStash::Filters::Elasticsearch.new(plugin_config) } let(:plugin_config) do { + "query_type" => "esql", "query" => "FROM test-index | STATS count() BY field | LIMIT 10" } end @@ -17,7 +18,7 @@ it "sets up the ESQL executor with correct parameters" do allow(logger).to receive(:debug) expect(esql_executor.instance_variable_get(:@query)).to eq(plugin_config["query"]) - expect(esql_executor.instance_variable_get(:@named_params)).to eq([]) + expect(esql_executor.instance_variable_get(:@query_params)).to eq({}) expect(esql_executor.instance_variable_get(:@fields)).to eq({}) expect(esql_executor.instance_variable_get(:@tag_on_failure)).to eq(["_elasticsearch_lookup_failure"]) end @@ -28,8 +29,9 @@ super() .merge( { + "query_type" => "esql", "query" => "FROM my-index | WHERE field = ?foo | LIMIT 5", - "query_params" => { "named_params" => [{ "foo" => "[bar]" }] }, + "query_params" => { "foo" => "[bar]" }, "fields" => { "val" => "val_new", "odd" => "new_odd" } }) } @@ -62,7 +64,7 @@ end it "processes the response and adds metadata" do - expect(event).to receive(:set).with("[@metadata][total_hits]", 1) + expect(event).to receive(:set).with("[@metadata][total_values]", 1) expect(event).to receive(:set).with("val_new", "bar") esql_executor.send(:process_response, event, response) end @@ -80,7 +82,7 @@ event = LogStash::Event.new({ "hello" => "world", "bar" => "resolve_me" }) expect { esql_executor.process(client, event) }.to_not raise_error - expect(event.get("[@metadata][total_hits]")).to eq(1) + expect(event.get("[@metadata][total_values]")).to eq(1) expect(event.get("hello")).to eq("world") expect(event.get("val_new")).to eq("bar") expect(event.get("new_odd")).to be_nil # filters out non-exist fields diff --git a/spec/filters/elasticsearch_spec.rb b/spec/filters/elasticsearch_spec.rb index c47db67..653e6eb 100644 --- a/spec/filters/elasticsearch_spec.rb +++ b/spec/filters/elasticsearch_spec.rb @@ -567,7 +567,7 @@ def wait_receive_request describe "ES|QL" do describe "compatibility" do - let(:config) {{ "hosts" => ["localhost:9200"], "query" => "FROM my-index" }} + let(:config) {{ "hosts" => ["localhost:9200"], "query_type" => "esql", "query" => "FROM my-index" }} context "when LS doesn't support ES|QL" do let(:ls_version) { LogStash::Filters::Elasticsearch::LS_ESQL_SUPPORT_VERSION } @@ -599,6 +599,7 @@ def wait_receive_request let(:config) do { "hosts" => ["localhost:9200"], + "query_type" => "esql", "query" => "FROM my-index", "index" => "some-index", "docinfo_fields" => { "_index" => "es_index" }, @@ -614,38 +615,41 @@ def wait_receive_request end end - context "when `named_params` isn't array" do + context "when placeholder doesn't exist in the query" do let(:config) do { "hosts" => ["localhost:9200"], + "query_type" => "esql", "query" => "FROM my-index", - "query_params" => { "named_params" => {"a" => "b"} }, + "query_params" => { "a" => "b" }, } end - it "raises a config error" do - expect { plugin.register }.to raise_error LogStash::ConfigurationError, /`query_params => named_params` is required to be array/ + it "doesn't complain since not used" do + expect { plugin.send(:validate_esql_query_and_params!) }.not_to raise_error end end if LOGSTASH_VERSION >= '8.17.4' - context "when `named_params` exists but not placeholder in the query" do + context "when illegal placeholders appear" do let(:config) do { "hosts" => ["localhost:9200"], - "query" => "FROM my-index", - "query_params" => { "named_params" => [{"a" => "b"}] }, + "query_type" => "esql", + "query" => "FROM my-index | WHERE type = ?type", + "query_params" => { "1abcd_efg1" => "1", "$abcd_efg1" => 2, "type" => 3 }, } end - it "doesn't complain since not used" do - expect { plugin.send(:validate_esql_query_and_params!) }.not_to raise_error + it "raises a config error" do + expect { plugin.register }.to raise_error LogStash::ConfigurationError, 'Illegal ["1abcd_efg1", "$abcd_efg1"] placeholder names in `query_params`' end end if LOGSTASH_VERSION >= '8.17.4' - context "when placeholder and `named_params` do not match" do + context "when placeholder and `query_params` do not match" do let(:config) do { "hosts" => ["localhost:9200"], + "query_type" => "esql", "query" => "FROM my-index | WHERE type = ?type", - "query_params" => { "named_params" => [{"b" => "c"}] }, + "query_params" => {"b" => "c"}, } end it "raises a config error" do diff --git a/spec/filters/integration/elasticsearch_esql_spec.rb b/spec/filters/integration/elasticsearch_esql_spec.rb index 0960b64..b8eafea 100644 --- a/spec/filters/integration/elasticsearch_esql_spec.rb +++ b/spec/filters/integration/elasticsearch_esql_spec.rb @@ -23,7 +23,8 @@ end let(:config) do { - "hosts" => ES_HOSTS + "hosts" => ES_HOSTS, + "query_type" => "esql" } end let(:event) { LogStash::Event.new({}) } @@ -35,7 +36,7 @@ is_ls_with_esql_supported_client = Gem::Version.create(LOGSTASH_VERSION) >= Gem::Version.create(LogStash::Filters::Elasticsearch::LS_ESQL_SUPPORT_VERSION) skip "LS version does not have ES client which supports ES|QL" unless is_ls_with_esql_supported_client - # Skip tests if ES version doesn't support ES||QL + # Skip tests if an ES version doesn't support ES||QL es_client = SECURE_INTEGRATION ? Elasticsearch::Client.new(hosts: ES_HOSTS, user: 'tests', password: 'Tests123') : Elasticsearch::Client.new(hosts: ES_HOSTS) @@ -70,7 +71,7 @@ shared_examples "ESQL query execution" do |expected_count, fields| it "processes the event" do plugin.filter(event) - expect(event.get("[@metadata][total_hits]")).to eq(expected_count) + expect(event.get("[@metadata][total_values]")).to eq(expected_count) fields&.each do | old_key, new_key | expect(event.get(new_key)).not_to be(nil) end @@ -128,7 +129,7 @@ it "processes the event" do plugin.filter(event) - expect(event.get("[@metadata][total_hits]")).to eq(6) + expect(event.get("[@metadata][total_values]")).to eq(6) expect(event.get("target_message").size).to eq(6) expect(event.get("target_count").size).to eq(5) end From 77dc303e1945d375b0b8fcb7c1dc394d45001dae Mon Sep 17 00:00:00 2001 From: Mashhur Date: Tue, 27 May 2025 00:48:58 -0700 Subject: [PATCH 12/21] Separate event referenced and static valued fields at initialization of the ESQL executor. --- .../filters/elasticsearch/esql_executor.rb | 14 ++++++++------ spec/filters/elasticsearch_esql_spec.rb | 3 ++- 2 files changed, 10 insertions(+), 7 deletions(-) diff --git a/lib/logstash/filters/elasticsearch/esql_executor.rb b/lib/logstash/filters/elasticsearch/esql_executor.rb index f035666..2c58ef9 100644 --- a/lib/logstash/filters/elasticsearch/esql_executor.rb +++ b/lib/logstash/filters/elasticsearch/esql_executor.rb @@ -15,11 +15,12 @@ def initialize(plugin, logger) @query.concat(' | LIMIT 1') end - @query_params = plugin.params["query_params"] || {} - + query_params = plugin.params["query_params"] || {} + @referenced_params, static_valued_params = query_params.partition { |_, v| v.kind_of?(String) && v.match?(/^\[.*\]$/) }.map(&:to_h) + @static_params = static_valued_params.map { |k, v| { k => v } } @fields = plugin.params["fields"] @tag_on_failure = plugin.params["tag_on_failure"] - @logger.debug("ES|QL query executor initialized with ", query: @query, query_params: @query_params) + @logger.debug("ES|QL query executor initialized with ", query: @query, query_params: query_params) @target_field = plugin.params["target"] if @target_field @@ -30,7 +31,8 @@ def self.apply_target(path); path; end end def process(client, event) - resolved_params = @query_params&.any? ? resolve_parameters(event) : [] + resolved_params = @referenced_params&.any? ? resolve_parameters(event) : [] + resolved_params.concat(@static_params) if @static_params&.any? response = execute_query(client, resolved_params) inform_warning(response) process_response(event, response) @@ -43,7 +45,7 @@ def process(client, event) private def resolve_parameters(event) - @query_params.each_with_object([]) do |(key, value), resolved_parameters| + @referenced_params.each_with_object([]) do |(key, value), resolved_parameters| begin resolved_value = event.get(value) @logger.debug("Resolved value for #{key}: #{resolved_value}, its class: #{resolved_value.class}") @@ -77,7 +79,7 @@ def process_response(event, response) end event.set("[@metadata][total_values]", values.size) - # @logger.debug("Executing ES|QL values size ", values.size) + @logger.debug("Executing ES|QL values size ", values.size) add_requested_fields(event, columns, values) end diff --git a/spec/filters/elasticsearch_esql_spec.rb b/spec/filters/elasticsearch_esql_spec.rb index c06d23c..c5fa962 100644 --- a/spec/filters/elasticsearch_esql_spec.rb +++ b/spec/filters/elasticsearch_esql_spec.rb @@ -18,7 +18,8 @@ it "sets up the ESQL executor with correct parameters" do allow(logger).to receive(:debug) expect(esql_executor.instance_variable_get(:@query)).to eq(plugin_config["query"]) - expect(esql_executor.instance_variable_get(:@query_params)).to eq({}) + expect(esql_executor.instance_variable_get(:@referenced_params)).to eq({}) + expect(esql_executor.instance_variable_get(:@static_params)).to eq([]) expect(esql_executor.instance_variable_get(:@fields)).to eq({}) expect(esql_executor.instance_variable_get(:@tag_on_failure)).to eq(["_elasticsearch_lookup_failure"]) end From 9408789eff33b98a71c44f749b28362c46a05914 Mon Sep 17 00:00:00 2001 From: Mashhur Date: Tue, 27 May 2025 13:40:41 -0700 Subject: [PATCH 13/21] query_params now supports both Array and Hash types. --- docs/index.asciidoc | 4 +- lib/logstash/filters/elasticsearch.rb | 24 +++-- .../filters/elasticsearch/esql_executor.rb | 8 +- spec/filters/elasticsearch_esql_spec.rb | 38 ++++++++ spec/filters/elasticsearch_spec.rb | 95 ++++++++++++++----- 5 files changed, 132 insertions(+), 37 deletions(-) diff --git a/docs/index.asciidoc b/docs/index.asciidoc index a2c8380..bc1bd52 100644 --- a/docs/index.asciidoc +++ b/docs/index.asciidoc @@ -179,7 +179,7 @@ NOTE: As of version `4.0.0` of this plugin, a number of previously deprecated se | <> |<>|No | <> |<>|No | <> |<>, one of `["dsl", "esql"]`|No -| <> |<>|No +| <> |<> or <>|No | <> |<>|No | <> |<>|No | <> |<>|No @@ -395,7 +395,7 @@ When `esql`, the query shape must be a valid {esql} string and `index`, `query_t [id="plugins-{type}s-{plugin}-query_params"] ===== `query_params` -* The value type is <> +* The value type is <> or <>. When an array provided, the array elements are pairs of `key` and `value`. * There is no default value for this setting Named parameters in {esql} to send to {es} together with <>. diff --git a/lib/logstash/filters/elasticsearch.rb b/lib/logstash/filters/elasticsearch.rb index e89f18b..a74e60b 100644 --- a/lib/logstash/filters/elasticsearch.rb +++ b/lib/logstash/filters/elasticsearch.rb @@ -144,12 +144,13 @@ class LogStash::Filters::Elasticsearch < LogStash::Filters::Base # named placeholders in ES|QL query # example, - # if the query is "FROM my-index | WHERE some_type = ?type" + # if the query is "FROM my-index | WHERE some_type = ?type AND depth > ?min_depth" # named placeholders can be applied as the following in query_params: - # query_params => { - # "type" => "%{[type]}" - # } - config :query_params, :validate => :hash, :default => {} + # query_params => [ + # {"type" => "%{[type]}"} + # {"min_depth" => "%{[depth]}"} + # ] + config :query_params, :validate => :array, :default => [] config :ssl, :obsolete => "Set 'ssl_enabled' instead." config :ca_file, :obsolete => "Set 'ssl_certificate_authorities' instead." @@ -462,8 +463,19 @@ def validate_ls_version_for_esql_support! end def validate_esql_query_and_params! + # If Array, validate that query_params needs to contain only single-entry hashes, convert it to a Hash + if @query_params.kind_of?(Array) + illegal_entries = @query_params.reject {|e| e.kind_of?(Hash) && e.size == 1 } + raise LogStash::ConfigurationError, "`query_params` must contain only single-entry hashes. Illegal placeholders: #{illegal_entries}" if illegal_entries.any? + + @query_params = @query_params.reduce({}, :merge) + end + illegal_keys = @query_params.keys.reject {|k| k[/^[a-z_][a-z0-9_]*$/] } - raise LogStash::ConfigurationError, "Illegal #{illegal_keys} placeholder names in `query_params`" if illegal_keys.any? + if illegal_keys.any? + message = "Illegal #{illegal_keys} placeholder names in `query_params`. A valid parameter name starts with a letter and contains letters, digits and underscores only;" + raise LogStash::ConfigurationError, message + end placeholders = @query.scan(/(?<=[?])[a-z_][a-z0-9_]*/i) placeholders.each do |placeholder| diff --git a/lib/logstash/filters/elasticsearch/esql_executor.rb b/lib/logstash/filters/elasticsearch/esql_executor.rb index 2c58ef9..c9e6f89 100644 --- a/lib/logstash/filters/elasticsearch/esql_executor.rb +++ b/lib/logstash/filters/elasticsearch/esql_executor.rb @@ -15,8 +15,10 @@ def initialize(plugin, logger) @query.concat(' | LIMIT 1') end - query_params = plugin.params["query_params"] || {} - @referenced_params, static_valued_params = query_params.partition { |_, v| v.kind_of?(String) && v.match?(/^\[.*\]$/) }.map(&:to_h) + query_params = plugin.query_params || {} + reference_valued_params, static_valued_params = query_params.partition { |_, v| v.kind_of?(String) && v.match?(/^\[.*\]$/) } + @referenced_params = reference_valued_params&.to_h + # keep static params as an array of hashes to attach to the ES|QL api param easily @static_params = static_valued_params.map { |k, v| { k => v } } @fields = plugin.params["fields"] @tag_on_failure = plugin.params["tag_on_failure"] @@ -49,7 +51,7 @@ def resolve_parameters(event) begin resolved_value = event.get(value) @logger.debug("Resolved value for #{key}: #{resolved_value}, its class: #{resolved_value.class}") - resolved_parameters << { key => resolved_value } if resolved_value + resolved_parameters << { key => resolved_value } rescue => e # catches invalid field reference @logger.error("Failed to resolve parameter", key: key, value: value, error: e.message) diff --git a/spec/filters/elasticsearch_esql_spec.rb b/spec/filters/elasticsearch_esql_spec.rb index c5fa962..f30ec7e 100644 --- a/spec/filters/elasticsearch_esql_spec.rb +++ b/spec/filters/elasticsearch_esql_spec.rb @@ -108,4 +108,42 @@ end end + describe "#query placeholders" do + before(:each) do + allow(logger).to receive(:debug) + plugin.send(:validate_esql_query_and_params!) + end + + context "when `query_params` is an Array contains {key => val} entries" do + let(:plugin_config) { + super() + .merge( + { + "query" => "FROM my-index | LIMIT 1", + "query_params" => [{ "a" => "b" }, { "c" => "[b]" }, { "e" => 1 }, { "f" => "[g]" }], + }) + } + + it "separates references and static params at initialization" do + expect(esql_executor.instance_variable_get(:@referenced_params)).to eq({"c" => "[b]", "f" => "[g]"}) + expect(esql_executor.instance_variable_get(:@static_params)).to eq([{"a" => "b"}, {"e" => 1}]) + end + end + + context "when `query_params` is a Hash" do + let(:plugin_config) { + super() + .merge( + { + "query" => "FROM my-index | LIMIT 1", + "query_params" => { "a" => "b", "c" => "[b]", "e" => 1, "f" => "[g]" }, + }) + } + + it "separates references and static params at initialization" do + expect(esql_executor.instance_variable_get(:@referenced_params)).to eq({"c" => "[b]", "f" => "[g]"}) + expect(esql_executor.instance_variable_get(:@static_params)).to eq([{"a" => "b"}, {"e" => 1}]) + end + end + end end if LOGSTASH_VERSION >= LogStash::Filters::Elasticsearch::LS_ESQL_SUPPORT_VERSION \ No newline at end of file diff --git a/spec/filters/elasticsearch_spec.rb b/spec/filters/elasticsearch_spec.rb index 653e6eb..4c1deb4 100644 --- a/spec/filters/elasticsearch_spec.rb +++ b/spec/filters/elasticsearch_spec.rb @@ -615,45 +615,88 @@ def wait_receive_request end end - context "when placeholder doesn't exist in the query" do + describe "#query placeholder" do let(:config) do { "hosts" => ["localhost:9200"], - "query_type" => "esql", - "query" => "FROM my-index", - "query_params" => { "a" => "b" }, + "query_type" => "esql" } end - it "doesn't complain since not used" do - expect { plugin.send(:validate_esql_query_and_params!) }.not_to raise_error + + context "when query placeholder doesn't exist in the query" do + let(:config) { + super() + .merge( + { + "query" => "FROM my-index", + "query_params" => { "a" => "b" }, + }) + } + + it "doesn't complain since not used" do + expect { plugin.send(:validate_esql_query_and_params!) }.not_to raise_error + end end - end if LOGSTASH_VERSION >= '8.17.4' - context "when illegal placeholders appear" do - let(:config) do - { - "hosts" => ["localhost:9200"], - "query_type" => "esql", - "query" => "FROM my-index | WHERE type = ?type", - "query_params" => { "1abcd_efg1" => "1", "$abcd_efg1" => 2, "type" => 3 }, + context "when illegal placeholders appear" do + let(:config) { + super() + .merge( + { + "query" => "FROM my-index | WHERE type = ?type", + "query_params" => { "1abcd_efg1" => "1", "$abcd_efg1" => 2, "type" => 3 }, + }) } + it "raises a config error" do + message = 'Illegal ["1abcd_efg1", "$abcd_efg1"] placeholder names in `query_params`. A valid parameter name starts with a letter and contains letters, digits and underscores only;' + expect { plugin.register }.to raise_error LogStash::ConfigurationError, message + end end - it "raises a config error" do - expect { plugin.register }.to raise_error LogStash::ConfigurationError, 'Illegal ["1abcd_efg1", "$abcd_efg1"] placeholder names in `query_params`' + + context "when query placeholders and `query_params` do not match" do + let(:config) { + super() + .merge( + { + "query" => "FROM my-index | WHERE type = ?type", + "query_params" => {"b" => "c"}, + }) + } + it "raises a config error" do + expect { plugin.register }.to raise_error LogStash::ConfigurationError, /Placeholder type not found in query/ + end end - end if LOGSTASH_VERSION >= '8.17.4' - context "when placeholder and `query_params` do not match" do - let(:config) do - { - "hosts" => ["localhost:9200"], - "query_type" => "esql", - "query" => "FROM my-index | WHERE type = ?type", - "query_params" => {"b" => "c"}, + context "when `query_params` is an Array contains {key => val} entries" do + let(:config) { + super() + .merge( + { + "query" => "FROM my-index", + "query_params" => [{ "a" => "b" }, { "c" => "[b]" }, { "e" => 1 }, { "f" => "[g]" }], + }) } + + it "doesn't complain since not used" do + expect { plugin.send(:validate_esql_query_and_params!) }.not_to raise_error + expect(plugin.query_params).to eq({ "a" => "b", "c" => "[b]", "e" => 1, "f" => "[g]" }) + end end - it "raises a config error" do - expect { plugin.register }.to raise_error LogStash::ConfigurationError, /Placeholder type not found in query/ + + context "when `query_params` is a Hash" do + let(:config) { + super() + .merge( + { + "query" => "FROM my-index", + "query_params" => { "a" => "b", "c" => "[b]", "e" => 1, "f" => "[g]" }, + }) + } + + it "doesn't complain since not used" do + expect { plugin.send(:validate_esql_query_and_params!) }.not_to raise_error + expect(plugin.query_params).to eq({ "a" => "b", "c" => "[b]", "e" => 1, "f" => "[g]" }) + end end end if LOGSTASH_VERSION >= '8.17.4' end From 68c0dfc935d4aa4765936be02794a3db50df2492 Mon Sep 17 00:00:00 2001 From: Mashhur Date: Tue, 27 May 2025 15:42:49 -0700 Subject: [PATCH 14/21] ES|QL executor parameters resolution logic simplification. --- lib/logstash/filters/elasticsearch/esql_executor.rb | 7 +++---- spec/filters/elasticsearch_esql_spec.rb | 1 - 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/lib/logstash/filters/elasticsearch/esql_executor.rb b/lib/logstash/filters/elasticsearch/esql_executor.rb index c9e6f89..304c017 100644 --- a/lib/logstash/filters/elasticsearch/esql_executor.rb +++ b/lib/logstash/filters/elasticsearch/esql_executor.rb @@ -47,15 +47,14 @@ def process(client, event) private def resolve_parameters(event) - @referenced_params.each_with_object([]) do |(key, value), resolved_parameters| + @referenced_params.map do |key, value| begin resolved_value = event.get(value) @logger.debug("Resolved value for #{key}: #{resolved_value}, its class: #{resolved_value.class}") - resolved_parameters << { key => resolved_value } + { key => resolved_value } rescue => e # catches invalid field reference - @logger.error("Failed to resolve parameter", key: key, value: value, error: e.message) - raise + raise "Failed to resolve parameter `#{key}` with `#{value}`. Error: #{e.message}" end end end diff --git a/spec/filters/elasticsearch_esql_spec.rb b/spec/filters/elasticsearch_esql_spec.rb index f30ec7e..2ce00e2 100644 --- a/spec/filters/elasticsearch_esql_spec.rb +++ b/spec/filters/elasticsearch_esql_spec.rb @@ -92,7 +92,6 @@ it "tags on plugin failures" do expect(event).to receive(:get).with("[bar]").and_raise("Event#get Invalid FieldReference error") - expect(logger).to receive(:error).with("Failed to resolve parameter", {:error=>"Event#get Invalid FieldReference error", :key=>"foo", :value=>"[bar]"}) expect(logger).to receive(:error).with("Failed to process ES|QL filter", exception: instance_of(RuntimeError)) expect(event).to receive(:tag).with("_elasticsearch_lookup_failure") esql_executor.process(client, event) From f23be7e2db5f5388d26ab40abbf28f623cdfaa96 Mon Sep 17 00:00:00 2001 From: Mashhur Date: Wed, 28 May 2025 11:04:14 -0700 Subject: [PATCH 15/21] Add tech preview section under ESQL. --- docs/index.asciidoc | 7 +++++++ lib/logstash/filters/elasticsearch.rb | 2 +- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/docs/index.asciidoc b/docs/index.asciidoc index bc1bd52..2116c9b 100644 --- a/docs/index.asciidoc +++ b/docs/index.asciidoc @@ -120,6 +120,13 @@ The `monitoring` permission at cluster level is necessary to perform periodic co [id="plugins-{type}s-{plugin}-esql"] ==== {esql} support + +.Technical Preview +**** +The {esql} feature that allows using ES|QL queries with this plugin is in Technical Preview. +Configuration options and implementation details are subject to change in minor releases without being preceded by deprecation warnings. +**** + {es} Query Language ({esql}) provides a SQL-like interface for querying your {es} data. To use {esql}, this plugin needs to be installed in {ls} 8.17.4 or newer, and must be connected to {es} 8.11 or newer. diff --git a/lib/logstash/filters/elasticsearch.rb b/lib/logstash/filters/elasticsearch.rb index a74e60b..fbc17d6 100644 --- a/lib/logstash/filters/elasticsearch.rb +++ b/lib/logstash/filters/elasticsearch.rb @@ -133,7 +133,7 @@ class LogStash::Filters::Elasticsearch < LogStash::Filters::Base # Tags the event on failure to look up geo information. This can be used in later analysis. config :tag_on_failure, :validate => :array, :default => ["_elasticsearch_lookup_failure"] - # If set, the the result set will be nested under the target field + # If set, the result set will be nested under the target field config :target, :validate => :field_reference # How many times to retry on failure? From fe5074cb4d618a40db18cacbaee76943c45b32ec Mon Sep 17 00:00:00 2001 From: Mashhur Date: Mon, 9 Jun 2025 23:23:26 -0700 Subject: [PATCH 16/21] Place the query results based on the target specified. If not specified, first result will be set to event's top level. --- CHANGELOG.md | 3 + docs/index.asciidoc | 72 ++++++++++- lib/logstash/filters/elasticsearch.rb | 2 +- .../filters/elasticsearch/esql_executor.rb | 120 +++++++++++++----- logstash-filter-elasticsearch.gemspec | 2 +- spec/filters/elasticsearch_esql_spec.rb | 79 ++++++++++-- 6 files changed, 236 insertions(+), 42 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 8522803..e6d1cec 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,6 @@ +## 4.3.0 + - ES|QL support [#194](https://github.com/logstash-plugins/logstash-filter-elasticsearch/pull/194) + ## 4.2.0 - Add `target` configuration option to store the result into it [#196](https://github.com/logstash-plugins/logstash-filter-elasticsearch/pull/196) diff --git a/docs/index.asciidoc b/docs/index.asciidoc index 2116c9b..a4aea43 100644 --- a/docs/index.asciidoc +++ b/docs/index.asciidoc @@ -154,9 +154,74 @@ The following is a basic {esql} query that sets the food name to transaction eve Set `config.support_escapes: true` in `logstash.yml` if you need to escape special chars in the query. -In the result event, the plugin sets total result size in `[@metadata][total_values]` field. It also limits the result size to 1 when `FROM` query is used. +In the result event, the plugin sets total result size in `[@metadata][total_values]` field. + +[id="plugins-{type}s-{plugin}-esql-event-mapping"] +===== Mapping {esql} result to {ls} event +{esql} returns query results in a structured tabular format, where data is organized into _columns_ (fields) and _values_ (entries). +The plugin maps each value entry to an event, populating corresponding fields. +For example, a query might produce a table like: + +[cols="2,1,1,1,2",options="header"] +|=== +|`timestamp` |`user_id` | `action` | `status.code` | `status.desc` + +|2025-04-10T12:00:00 |123 |login |200 | Success +|2025-04-10T12:05:00 |456 |purchase |403 | Forbidden (unauthorized user) +|=== + +For this case, the plugin creates two JSON look like objects as following: +[source, json] +[ + { + "timestamp": "2025-04-10T12:00:00", + "user_id": 123, + "action": "login", + "status": { + "code": 200, + "desc": "Success" + } + }, + { + "timestamp": "2025-04-10T12:05:00", + "user_id": 456, + "action": "purchase", + "status": { + "code": 403, + "desc": "Forbidden (unauthorized user)" + } + } +] + +NOTE: If your index has a mapping with sub-objects where `status.code` and `status.desc` actually dotted fields, they appear in {ls} events as a nested structure. + +The plugin uses the `target` field to determine where to place the results. +If `target` is not defined, the plugin places the _only_ first result at the root of the event. + +[id="plugins-{type}s-{plugin}-esql-multifields"] +===== Conflict on multi-fields -NOTE: If `LIMIT` isn't set, the plugin attaches `| LIMIT 1`. +{esql} query fetches all parent and sub-fields fields if your {es} index has https://www.elastic.co/docs/reference/elasticsearch/mapping-reference/multi-fields[multi-fields] or https://www.elastic.co/docs/reference/elasticsearch/mapping-reference/subobjects[subobjects]. +Since {ls} events cannot contain parent field's concrete value and sub-field values together, the plugin ignores sub-fields with warning and includes parent. +We recommend using the `RENAME` (or `DROP` to avoid warning) keyword in your {esql} query explicitly rename the fields to include sub-fields into the event. + +This is a common occurrence if your template or mapping follows the pattern of always indexing strings as "text" (`field`) + " keyword" (`field.keyword`) multi-field. +In this case it's recommended to do `KEEP field` if the string is identical and there is only one subfield as the engine will optimize and retrieve the keyword, otherwise you can do `KEEP field.keyword | RENAME field.keyword as field`. + +To illustrate the situation with example, assuming your mapping has a time `time` field with `time.min` and `time.max` sub-fields as following: +[source, ruby] + "properties": { + "time": { "type": "long" }, + "time.min": { "type": "long" }, + "time.max": { "type": "long" } + } + +The {esql} result will contain all three fields but the plugin cannot map them into {ls} event. +To avoid this, you can use the `RENAME` keyword to rename the `time` parent field to get all three fields with unique fields. +[source, ruby] + ... + query => 'FROM my-index | RENAME time AS time.current' + ... For comprehensive ES|QL syntax reference and best practices, see the https://www.elastic.co/guide/en/elasticsearch/reference/current/esql-syntax.html[{esql} documentation]. @@ -603,8 +668,9 @@ Tags the event on failure to look up previous log event information. This can be Define the target field for placing the result data. If this setting is omitted, the target will be the root (top level) of the event. +It is highly recommended to set when using `query_type=>'esql'` to set all query results into the event. -The destination fields specified in <>, <>, and <> are relative to this target. +When `query_type=>'dsl'`, the destination fields specified in <>, <>, and <> are relative to this target. For example, if you want the data to be put in the `operation` field: [source,ruby] diff --git a/lib/logstash/filters/elasticsearch.rb b/lib/logstash/filters/elasticsearch.rb index fbc17d6..ce133a5 100644 --- a/lib/logstash/filters/elasticsearch.rb +++ b/lib/logstash/filters/elasticsearch.rb @@ -188,7 +188,7 @@ def self.validate_value(value, validator) def register case @query_type when "esql" - invalid_params_with_esql = original_params.keys & %w(index query_template sort docinfo_fields aggregation_fields enable_sort result_size) + invalid_params_with_esql = original_params.keys & %w(index query_template sort fields docinfo_fields aggregation_fields enable_sort result_size) raise LogStash::ConfigurationError, "Configured #{invalid_params_with_esql} params cannot be used with ES|QL query" if invalid_params_with_esql.any? validate_ls_version_for_esql_support! diff --git a/lib/logstash/filters/elasticsearch/esql_executor.rb b/lib/logstash/filters/elasticsearch/esql_executor.rb index 304c017..e08b699 100644 --- a/lib/logstash/filters/elasticsearch/esql_executor.rb +++ b/lib/logstash/filters/elasticsearch/esql_executor.rb @@ -5,31 +5,28 @@ module Filters class Elasticsearch class EsqlExecutor + ESQL_PARSERS_BY_TYPE = Hash.new(lambda { |x| x }).merge( + 'date' => ->(value) { value && LogStash::Timestamp.new(value) }, + ) + def initialize(plugin, logger) @logger = logger @event_decorator = plugin.method(:decorate) @query = plugin.params["query"] - unless @query.match?(/\|\s*LIMIT/) - @logger.warn("ES|QL query doesn't contain LIMIT, adding `| LIMIT 1` to optimize the performance") - @query.concat(' | LIMIT 1') - end query_params = plugin.query_params || {} reference_valued_params, static_valued_params = query_params.partition { |_, v| v.kind_of?(String) && v.match?(/^\[.*\]$/) } @referenced_params = reference_valued_params&.to_h # keep static params as an array of hashes to attach to the ES|QL api param easily @static_params = static_valued_params.map { |k, v| { k => v } } - @fields = plugin.params["fields"] @tag_on_failure = plugin.params["tag_on_failure"] @logger.debug("ES|QL query executor initialized with ", query: @query, query_params: query_params) + # if the target is specified, all result entries will be copied to the target field + # otherwise, the first value of the result will be copied to the event @target_field = plugin.params["target"] - if @target_field - def self.apply_target(path); "[#{@target_field}][#{path}]"; end - else - def self.apply_target(path); path; end - end + @logger.warn("Only first query result will be copied to the event. Please specify `target` in plugin config to include all") if @target_field.nil? end def process(client, event) @@ -73,15 +70,43 @@ def process_response(event, response) return end - # this shouldn't happen but just in case not crash the plugin + # this shouldn't happen but just in case to avoid crashes the plugin if columns.nil? || columns.size == 0 @logger.error("No columns exist but received values", columns: columns, values: values) return end event.set("[@metadata][total_values]", values.size) - @logger.debug("Executing ES|QL values size ", values.size) - add_requested_fields(event, columns, values) + @logger.debug("ES|QL query result values size ", size: values.size) + + column_specs = columns.map { |column| ColumnSpec.new(column) } + sub_element_mark_map = mark_sub_elements(column_specs) + multi_fields = sub_element_mark_map.filter_map { |key, val| key.name if val == true } + + @logger.debug("Multi-fields found in ES|QL result and they will not be available in the event. Please use `RENAME` command if you want to include them.", { :detected_multi_fields => multi_fields }) if multi_fields.any? + + if @target_field + values_to_set = values.map do |row| + mapped_data = column_specs.each_with_index.with_object({}) do |(column, index), mapped_data| + # `unless value.nil?` is a part of `drop_null_columns` that if some of the columns' values are not `nil`, `nil` values appear, + # we should continuously filter them out to achieve full `drop_null_columns` on each individual row (ideal `LIMIT 1` result) + # we also exclude sub-elements of the base field + if row && sub_element_mark_map[column] == false + value_to_set = ESQL_PARSERS_BY_TYPE[column.type].call(row[index]) + mapped_data[column.name] = value_to_set + end + end + generate_nested_structure(mapped_data) unless mapped_data.empty? + end + event.set("[#{@target_field}]", values_to_set) + else + column_specs.zip(values.first).each do |(column, value) | + if value && sub_element_mark_map[column] == false + value_to_set = ESQL_PARSERS_BY_TYPE[column.type].call(value) + event.set(column.field_reference, value_to_set) + end + end + end end def inform_warning(response) @@ -89,26 +114,63 @@ def inform_warning(response) @logger.warn("ES|QL executor received warning", { message: warning }) end - def add_requested_fields(event, columns, values) - @fields.each do |old_key, new_key| - column_index = columns.find_index { |col| col['name'] == old_key } - next unless column_index + # Transforms dotted keys to nested JSON shape + # @param dot_keyed_hash [Hash] whose keys are dotted (example 'a.b.c.d': 'val') + # @return [Hash] whose keys are nested with value mapped ({'a':{'b':{'c':{'d':'val'}}}}) + def generate_nested_structure(dot_keyed_hash) + dot_keyed_hash.each_with_object({}) do |(key, value), result| + key_parts = key.to_s.split('.') + *path, leaf = key_parts + leaf_scope = path.inject(result) { |scope, part| scope[part] ||= {} } + leaf_scope[leaf] = value + end + end - row_values = values.map { |entry| entry[column_index] }&.compact - value_to_set = row_values.count > 1 ? row_values : row_values.first - set_to_event_target(event, new_key, value_to_set) unless value_to_set.nil? + # Determines whether each column in a collection is a nested sub-element (e.g "user.age") + # of another column in the same collection (e.g "user"). + # + # @param columns [Array] An array of objects with a `name` attribute representing field paths. + # @return [Hash] A hash mapping each column to `true` if it is a sub-element of another field, `false` otherwise. + # Time complexity: (O(NlogN+N*K)) where K is the number of conflict depth + # without (`prefix_set`) memoization, it would be O(N^2) + def mark_sub_elements(columns) + # Sort columns by name length (ascending) + sorted_columns = columns.sort_by { |c| c.name.length } + prefix_set = Set.new # memoization set + + sorted_columns.each_with_object({}) do |column, memo| + # Split the column name into parts (e.g., "user.profile.age" → ["user", "profile", "age"]) + parts = column.name.split('.') + + # Generate all possible parent prefixes (e.g., "user", "user.profile") + # and check if any parent prefix exists in the set + parent_prefixes = (0...parts.size - 1).map { |i| parts[0..i].join('.') } + memo[column] = parent_prefixes.any? { |prefix| prefix_set.include?(prefix) } + prefix_set.add(column.name) end end + end + + # Class representing a column specification in the ESQL response['columns'] + # The class's main purpose is to provide a structure for the event key + # columns is an array with `name` and `type` pair (example: `{"name"=>"@timestamp", "type"=>"date"}`) + # @attr_reader :name [String] The name of the column + # @attr_reader :type [String] The type of the column + class ColumnSpec + attr_reader :name, :type + + def initialize(spec) + @name = isolate(spec.fetch('name')) + @type = isolate(spec.fetch('type')) + end - # if @target is defined, creates a nested structure to inject a result into the target field - # if not defined, directly sets to the top-level event field - # @param event [LogStash::Event] - # @param new_key [String] name of the field to set - # @param value_to_set [Array] values to set - # @return [void] - def set_to_event_target(event, new_key, value_to_set) - key_to_set = self.apply_target(new_key) - event.set(key_to_set, value_to_set) + def field_reference + @_field_reference ||= '[' + name.gsub('.', '][') + ']' + end + + private + def isolate(value) + value.frozen? ? value : value.clone.freeze end end end diff --git a/logstash-filter-elasticsearch.gemspec b/logstash-filter-elasticsearch.gemspec index 273a820..214b130 100644 --- a/logstash-filter-elasticsearch.gemspec +++ b/logstash-filter-elasticsearch.gemspec @@ -1,7 +1,7 @@ Gem::Specification.new do |s| s.name = 'logstash-filter-elasticsearch' - s.version = '4.2.0' + s.version = '4.3.0' s.licenses = ['Apache License (2.0)'] s.summary = "Copies fields from previous log events in Elasticsearch to current events " s.description = "This gem is a Logstash plugin required to be installed on top of the Logstash core pipeline using $LS_HOME/bin/logstash-plugin install gemname. This gem is not a stand-alone program" diff --git a/spec/filters/elasticsearch_esql_spec.rb b/spec/filters/elasticsearch_esql_spec.rb index 2ce00e2..1768726 100644 --- a/spec/filters/elasticsearch_esql_spec.rb +++ b/spec/filters/elasticsearch_esql_spec.rb @@ -17,10 +17,10 @@ context "when initializes" do it "sets up the ESQL executor with correct parameters" do allow(logger).to receive(:debug) + allow(logger).to receive(:warn) expect(esql_executor.instance_variable_get(:@query)).to eq(plugin_config["query"]) expect(esql_executor.instance_variable_get(:@referenced_params)).to eq({}) expect(esql_executor.instance_variable_get(:@static_params)).to eq([]) - expect(esql_executor.instance_variable_get(:@fields)).to eq({}) expect(esql_executor.instance_variable_get(:@tag_on_failure)).to eq(["_elasticsearch_lookup_failure"]) end end @@ -30,17 +30,21 @@ super() .merge( { - "query_type" => "esql", "query" => "FROM my-index | WHERE field = ?foo | LIMIT 5", - "query_params" => { "foo" => "[bar]" }, - "fields" => { "val" => "val_new", "odd" => "new_odd" } + "query_params" => { "foo" => "[bar]" } }) } let(:event) { LogStash::Event.new({}) } - let(:response) { { 'values' => [["foo", "bar", nil]], 'columns' => [{ 'name' => 'id' }, { 'name' => 'val' }, { 'name' => 'odd' }] } } + let(:response) { + { + 'values' => [["foo", "bar", nil]], + 'columns' => [{ 'name' => 'id', 'type' => 'keyword' }, { 'name' => 'val', 'type' => 'keyword' }, { 'name' => 'odd', 'type' => 'keyword' }] + } + } before do allow(logger).to receive(:debug) + allow(logger).to receive(:warn) end it "resolves parameters" do @@ -66,7 +70,9 @@ it "processes the response and adds metadata" do expect(event).to receive(:set).with("[@metadata][total_values]", 1) - expect(event).to receive(:set).with("val_new", "bar") + # [id], [val] aren't resolved via sprintf, use as it is + expect(event).to receive(:set).with("[id]", "foo") + expect(event).to receive(:set).with("[val]", "bar") esql_executor.send(:process_response, event, response) end @@ -85,8 +91,8 @@ expect { esql_executor.process(client, event) }.to_not raise_error expect(event.get("[@metadata][total_values]")).to eq(1) expect(event.get("hello")).to eq("world") - expect(event.get("val_new")).to eq("bar") - expect(event.get("new_odd")).to be_nil # filters out non-exist fields + expect(event.get("val")).to eq("bar") + expect(event.get("odd")).to be_nil # filters out non-exist fields end it "tags on plugin failures" do @@ -105,11 +111,68 @@ expect(event).to receive(:tag).with("_elasticsearch_lookup_failure") esql_executor.process(client, event) end + + describe "#target" do + let(:event) { LogStash::Event.new({ "hello" => "world", "bar" => "resolve_me" }) } + let(:response) { + super().merge({ 'values' => [["foo", "bar", nil], %w[hello again world], %w[another value here]] }) + } + before(:each) do + expect(client).to receive(:esql_query).with(any_args).and_return(response) + allow(plugin).to receive(:decorate) + allow(logger).to receive(:debug) + allow(response).to receive(:headers).and_return({}) + end + + context "when specified" do + let(:plugin_config) { + super().merge({ "target" => "my-target" }) + } + + it "sets all query results into event" do + expected_result = [ + {"id"=>"foo", "val"=>"bar", "odd"=>nil}, + {"id"=>"hello", "val"=>"again", "odd"=>"world"}, + {"id"=>"another", "val"=>"value", "odd"=>"here"} + ] + expect { esql_executor.process(client, event) }.to_not raise_error + expect(event.get("[@metadata][total_values]")).to eq(3) + expect(event.get("my-target").size).to eq(3) + expect(event.get("my-target")).to eq(expected_result) + end + end + + context "when not specified" do + shared_examples "first result into the event" do + it "sets" do + expect { esql_executor.process(client, event) }.to_not raise_error + expect(event.get("[@metadata][total_values]")).to eq(3) + expect(event.get("id")).to eq("foo") + expect(event.get("val")).to eq("bar") + expect(event.get("odd")).to eq(nil) + end + end + context "when limit is included in the query" do + let(:plugin_config) { + super().merge({ "query" => "FROM my-index | LIMIT 555" }) + } + it_behaves_like "first result into the event" + end + + context "when limit is not included in the query" do + let(:plugin_config) { + super().merge({ "query" => "FROM my-index" }) + } + it_behaves_like "first result into the event" + end + end + end end describe "#query placeholders" do before(:each) do allow(logger).to receive(:debug) + allow(logger).to receive(:warn) plugin.send(:validate_esql_query_and_params!) end From 4a6c97a0c97b2ef43138dc30b3d3524331219b83 Mon Sep 17 00:00:00 2001 From: Mashhur <99575341+mashhurs@users.noreply.github.com> Date: Thu, 3 Jul 2025 10:30:22 -0700 Subject: [PATCH 17/21] Apply suggestions from code review MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Doc corrections. Co-authored-by: João Duarte --- docs/index.asciidoc | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/docs/index.asciidoc b/docs/index.asciidoc index a4aea43..8a2472a 100644 --- a/docs/index.asciidoc +++ b/docs/index.asciidoc @@ -143,12 +143,11 @@ The following is a basic {esql} query that sets the food name to transaction eve api_key => '....' query => ' FROM food-index - | WHERE id = "?food_id" + | WHERE id == ?food_id ' query_params => { "food_id" => "[food][id]" } - fields => { "food.name" => "food_name" } } } From 9cf7135946629409438ab612eadb7855486aecd5 Mon Sep 17 00:00:00 2001 From: Mashhur Date: Thu, 3 Jul 2025 10:32:04 -0700 Subject: [PATCH 18/21] ES|QL result mapping to event doc correction. --- docs/index.asciidoc | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/docs/index.asciidoc b/docs/index.asciidoc index 8a2472a..8274446 100644 --- a/docs/index.asciidoc +++ b/docs/index.asciidoc @@ -169,7 +169,8 @@ For example, a query might produce a table like: |2025-04-10T12:05:00 |456 |purchase |403 | Forbidden (unauthorized user) |=== -For this case, the plugin creates two JSON look like objects as following: +For this case, the plugin creates two JSON look like objects as below and places them into the `target` field of the event if `target` is defined. +If `target` is not defined, the plugin places the _only_ first result at the root of the event. [source, json] [ { @@ -194,9 +195,6 @@ For this case, the plugin creates two JSON look like objects as following: NOTE: If your index has a mapping with sub-objects where `status.code` and `status.desc` actually dotted fields, they appear in {ls} events as a nested structure. -The plugin uses the `target` field to determine where to place the results. -If `target` is not defined, the plugin places the _only_ first result at the root of the event. - [id="plugins-{type}s-{plugin}-esql-multifields"] ===== Conflict on multi-fields From 953a385ccde0d2223a4d61a2efda9a22cbecd339 Mon Sep 17 00:00:00 2001 From: Mashhur Date: Thu, 3 Jul 2025 12:54:15 -0700 Subject: [PATCH 19/21] A bugfix of when setting null values and integrations tests updated which removes non-allowed the fields param. --- .../filters/elasticsearch/esql_executor.rb | 2 +- .../integration/elasticsearch_esql_spec.rb | 35 ++++++++++--------- 2 files changed, 20 insertions(+), 17 deletions(-) diff --git a/lib/logstash/filters/elasticsearch/esql_executor.rb b/lib/logstash/filters/elasticsearch/esql_executor.rb index e08b699..82ca47b 100644 --- a/lib/logstash/filters/elasticsearch/esql_executor.rb +++ b/lib/logstash/filters/elasticsearch/esql_executor.rb @@ -91,7 +91,7 @@ def process_response(event, response) # `unless value.nil?` is a part of `drop_null_columns` that if some of the columns' values are not `nil`, `nil` values appear, # we should continuously filter them out to achieve full `drop_null_columns` on each individual row (ideal `LIMIT 1` result) # we also exclude sub-elements of the base field - if row && sub_element_mark_map[column] == false + if row[index] && sub_element_mark_map[column] == false value_to_set = ESQL_PARSERS_BY_TYPE[column.type].call(row[index]) mapped_data[column.name] = value_to_set end diff --git a/spec/filters/integration/elasticsearch_esql_spec.rb b/spec/filters/integration/elasticsearch_esql_spec.rb index b8eafea..c2154ed 100644 --- a/spec/filters/integration/elasticsearch_esql_spec.rb +++ b/spec/filters/integration/elasticsearch_esql_spec.rb @@ -72,20 +72,12 @@ it "processes the event" do plugin.filter(event) expect(event.get("[@metadata][total_values]")).to eq(expected_count) - fields&.each do | old_key, new_key | - expect(event.get(new_key)).not_to be(nil) + fields&.each do | field | + expect(event.get(field)).not_to be(nil) end end end - describe "LIMIT 1 by default" do - let(:config) do - super().merge("query" => "FROM #{es_index}") - end - - include_examples "ESQL query execution", 1 - end - describe "with simple FROM query with LIMIT" do let(:config) do super().merge("query" => "FROM #{es_index} | LIMIT 99") @@ -102,6 +94,14 @@ include_examples "ESQL query execution", 2 end + describe "with query params" do + let(:config) do + super().merge("query" => "FROM #{es_index} | WHERE type==?type", "query_params" => { "type" => "b" }) + end + + include_examples "ESQL query execution", 2 + end + describe "when invalid query used" do let(:config) do super().merge("query" => "FROM undefined index | LIMIT 1") @@ -114,24 +114,27 @@ end describe "when field enrichment requested" do - fields = {"message" => "target_message", "count" => "target_count"} let(:config) do - super().merge("query" => "FROM #{es_index} | WHERE type==\"b\" | LIMIT 99", "fields" => fields) + super().merge("query" => "FROM #{es_index} | WHERE type==\"b\" | LIMIT 99") end - include_examples "ESQL query execution", 2, fields + include_examples "ESQL query execution", 2, %w[message count] end describe "when non-exist field value appear" do let(:config) do - super().merge("query" => "FROM #{es_index} | LIMIT 99", "fields" => {"message" => "target_message", "count" => "target_count"}) + super().merge("query" => "FROM #{es_index}", "target" => "target_field") end it "processes the event" do plugin.filter(event) expect(event.get("[@metadata][total_values]")).to eq(6) - expect(event.get("target_message").size).to eq(6) - expect(event.get("target_count").size).to eq(5) + expect(event.get("target_field").size).to eq(6) + values = event.get("target_field") + counts = values.count { |entry| entry.key?("count") } + messages = values.count { |entry| entry.key?("message") } + expect(counts).to eq(5) + expect(messages).to eq(6) end end end From 8bf045f97a3a05ea777406977455ef861726f512 Mon Sep 17 00:00:00 2001 From: Mashhur Date: Thu, 3 Jul 2025 12:57:47 -0700 Subject: [PATCH 20/21] Failed unit test fixed. --- spec/filters/elasticsearch_esql_spec.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/spec/filters/elasticsearch_esql_spec.rb b/spec/filters/elasticsearch_esql_spec.rb index 1768726..ff102b4 100644 --- a/spec/filters/elasticsearch_esql_spec.rb +++ b/spec/filters/elasticsearch_esql_spec.rb @@ -131,7 +131,7 @@ it "sets all query results into event" do expected_result = [ - {"id"=>"foo", "val"=>"bar", "odd"=>nil}, + {"id"=>"foo", "val"=>"bar"}, {"id"=>"hello", "val"=>"again", "odd"=>"world"}, {"id"=>"another", "val"=>"value", "odd"=>"here"} ] From 2b445a22d4d472660d4d28450bc823a70228c39e Mon Sep 17 00:00:00 2001 From: Mashhur Date: Thu, 3 Jul 2025 16:29:05 -0700 Subject: [PATCH 21/21] Integration tests to run with credentials anebled and SSL configs. --- .../integration/elasticsearch_esql_spec.rb | 42 +++++++++++++++---- 1 file changed, 34 insertions(+), 8 deletions(-) diff --git a/spec/filters/integration/elasticsearch_esql_spec.rb b/spec/filters/integration/elasticsearch_esql_spec.rb index c2154ed..32da05f 100644 --- a/spec/filters/integration/elasticsearch_esql_spec.rb +++ b/spec/filters/integration/elasticsearch_esql_spec.rb @@ -6,8 +6,10 @@ describe LogStash::Filters::Elasticsearch, integration: true do + ELASTIC_SECURITY_ENABLED = ENV['ELASTIC_SECURITY_ENABLED'].eql? 'true' SECURE_INTEGRATION = ENV['SECURE_INTEGRATION'].eql? 'true' ES_HOSTS = ["http#{SECURE_INTEGRATION ? 's' : nil}://#{ESHelper.get_host_port}"] + CA_PATH = File.expand_path('../fixtures/test_certs/ca.crt', File.dirname(__FILE__)) let(:plugin) { described_class.new(config) } let(:es_index) { "es-filter-plugin-esql-integration-#{rand(1000)}" } @@ -21,25 +23,49 @@ { "message" => "odd test message", "type" => "t" } ] end - let(:config) do + + let(:base_config) do { + "query_type" => "esql", "hosts" => ES_HOSTS, - "query_type" => "esql" + "ssl_enabled" => SECURE_INTEGRATION } end + + let(:credentials) do + if SECURE_INTEGRATION + { 'user' => 'tests', 'password' => 'Tests123' } + else + { 'user' => 'elastic', 'password' => ENV['ELASTIC_PASSWORD'] } + end + end + + let(:config) do + config = ELASTIC_SECURITY_ENABLED ? base_config.merge(credentials) : base_config + config = { 'ssl_certificate_authorities' => CA_PATH }.merge(config) if SECURE_INTEGRATION + config + end + let(:event) { LogStash::Event.new({}) } - let(:es_client) do - Elasticsearch::Client.new(hosts: ES_HOSTS) + + def es_client + @es_client ||= begin + user = SECURE_INTEGRATION ? 'tests' : 'elastic' + password = SECURE_INTEGRATION ? 'Tests123' : ENV['ELASTIC_PASSWORD'] + + es_client_config = { hosts: ES_HOSTS } + es_client_config = es_client_config.merge({ user: user, password: password }) if ELASTIC_SECURITY_ENABLED || SECURE_INTEGRATION + es_client_config = es_client_config.merge({ transport_options: { ssl: { ca_path: CA_PATH, verify: false }}}) if SECURE_INTEGRATION + + Elasticsearch::Client.new(es_client_config) + end end before(:all) do is_ls_with_esql_supported_client = Gem::Version.create(LOGSTASH_VERSION) >= Gem::Version.create(LogStash::Filters::Elasticsearch::LS_ESQL_SUPPORT_VERSION) + # Skip tests if an ES version doesn't support ES|QL skip "LS version does not have ES client which supports ES|QL" unless is_ls_with_esql_supported_client - # Skip tests if an ES version doesn't support ES||QL - es_client = SECURE_INTEGRATION ? - Elasticsearch::Client.new(hosts: ES_HOSTS, user: 'tests', password: 'Tests123') : - Elasticsearch::Client.new(hosts: ES_HOSTS) es_version_info = es_client.info["version"] es_gem_version = Gem::Version.create(es_version_info["number"]) skip "ES version does not support ES|QL" if es_gem_version.nil? || es_gem_version < Gem::Version.create(LogStash::Filters::Elasticsearch::ES_ESQL_SUPPORT_VERSION)