From 7edc969de8aad66f36718d97b27deddd1d297dd3 Mon Sep 17 00:00:00 2001 From: Alessandro Bellucci Date: Thu, 11 Apr 2024 15:56:03 +0200 Subject: [PATCH 01/28] New timeseries resource: spec, crud/add, crud/retrieve --- code/src/sixsq/nuvla/db/binding.clj | 16 +- code/src/sixsq/nuvla/db/es/binding.clj | 70 ++++++--- code/src/sixsq/nuvla/db/impl.clj | 5 + .../server/resources/spec/timeseries.cljc | 51 ++++++ .../nuvla/server/resources/timeseries.clj | 145 ++++++++++++++++++ .../resources/spec/timeseries_test.cljc | 29 ++++ .../resources/timeseries_lifecycle_test.clj | 52 +++++++ 7 files changed, 349 insertions(+), 19 deletions(-) create mode 100644 code/src/sixsq/nuvla/server/resources/spec/timeseries.cljc create mode 100644 code/src/sixsq/nuvla/server/resources/timeseries.clj create mode 100644 code/test/sixsq/nuvla/server/resources/spec/timeseries_test.cljc create mode 100644 code/test/sixsq/nuvla/server/resources/timeseries_lifecycle_test.clj diff --git a/code/src/sixsq/nuvla/db/binding.clj b/code/src/sixsq/nuvla/db/binding.clj index 7be208d53..d89cc3f72 100644 --- a/code/src/sixsq/nuvla/db/binding.clj +++ b/code/src/sixsq/nuvla/db/binding.clj @@ -154,4 +154,18 @@ On failure, the function must throw an ex-info containing the error ring response. If the resource-id does not correspond to a Collection, then a 400 (bad-request) response must be returned. Other appropriate - error codes can also be thrown.")) + error codes can also be thrown.") + + (create-timeseries + [this timeseries-id options] + "This function creates the given timeseries in the database.") + + (retrieve-timeseries + [this timeseries-id] + "This function retrieves the identified timeseries from the database. + + On success, this returns the clojure map representation of the + timeseries. The response must not be embedded in a ring response. + + On failure, this function must throw an ex-info containing the error + ring response. If the resource doesn't exist, use a 404 status.")) diff --git a/code/src/sixsq/nuvla/db/es/binding.clj b/code/src/sixsq/nuvla/db/es/binding.clj index 78d8258c4..119bfaf5c 100644 --- a/code/src/sixsq/nuvla/db/es/binding.clj +++ b/code/src/sixsq/nuvla/db/es/binding.clj @@ -378,7 +378,7 @@ (log/error "unexpected status code when creating/updating" policy-name "ILM policy (" status "). " (or error e))))))) (defn create-timeseries-template - [client index mapping {:keys [routing-path look-back-time look-ahead-time start-time lifecycle-name]}] + [client index mappings {:keys [routing-path look-back-time look-ahead-time start-time lifecycle-name]}] (let [template-name (str index "-template")] (try (let [{:keys [status]} (spandex/request client @@ -401,7 +401,7 @@ look-back-time (assoc :index.look_back_time look-back-time) start-time (assoc :index.time_series.start_time start-time) lifecycle-name (assoc :index.lifecycle.name lifecycle-name)) - :mappings mapping}}})] + :mappings mappings}}})] (if (= 200 status) (do (log/debug template-name "index template created/updated") template-name) @@ -433,27 +433,55 @@ error (:error body)] (log/error "unexpected status code when creating" datastream-index-name "datastream (" status "). " (or error e))))))))) -(defn initialize-timeserie-datastream - [client collection-id {:keys [spec ilm-policy look-back-time look-ahead-time start-time] - :or {ilm-policy hot-warm-cold-delete-policy - look-back-time "7d"} - :as _options}] - (let [index (escu/collection-id->index collection-id) - mapping (mapping/mapping spec {:dynamic-templates false, :fulltext false}) - routing-path (mapping/time-series-routing-path spec) - ilm-policy-name (create-or-update-lifecycle-policy client index ilm-policy)] - (create-timeseries-template client index mapping {:routing-path routing-path - :lifecycle-name ilm-policy-name - :look-ahead-time look-ahead-time - :look-back-time look-back-time - :start-time start-time}) - (create-datastream client index))) +(defn create-timeseries-impl + [client timeseries-id + {:keys [mappings + routing-path + ilm-policy + look-back-time + look-ahead-time + start-time] + :or {ilm-policy hot-warm-cold-delete-policy + look-back-time "7d"} + :as _options}] + (let [ilm-policy-name (create-or-update-lifecycle-policy client timeseries-id ilm-policy)] + (create-timeseries-template client timeseries-id mappings + {:routing-path routing-path + :lifecycle-name ilm-policy-name + :look-ahead-time look-ahead-time + :look-back-time look-back-time + :start-time start-time}) + (create-datastream client timeseries-id))) + +(defn retrieve-timeseries-impl + [client timeseries-id] + (try + (let [response (spandex/request client {:url [:_data_stream timeseries-id], :method :get}) + found? (seq (get-in response [:body :data_streams]))] + (if found? + (:body response) + (throw (r/ex-not-found timeseries-id)))) + (catch Exception e + (let [{:keys [status] :as _response} (ex-data e)] + (if (= 404 status) + (throw (r/ex-not-found timeseries-id)) + (throw e)))))) + +(defn initialize-collection-timeseries + [client collection-id {:keys [spec] :as options}] + (let [timeseries-id (escu/collection-id->index collection-id) + mappings (mapping/mapping spec {:dynamic-templates false, :fulltext false}) + routing-path (mapping/time-series-routing-path spec)] + (create-timeseries-impl client timeseries-id + (assoc options + :mappings mappings + :routing-path routing-path)))) (defn initialize-db [client collection-id {:keys [spec timeseries] :as options}] (let [index (escu/collection-id->index collection-id)] (if timeseries - (initialize-timeserie-datastream client collection-id options) + (initialize-collection-timeseries client collection-id options) (let [mapping (mapping/mapping spec)] (create-index client index) (set-index-mapping client index mapping))))) @@ -501,6 +529,12 @@ (bulk-edit [_ collection-id options] (bulk-edit-data client collection-id options)) + (create-timeseries [_ timeseries-id options] + (create-timeseries-impl client timeseries-id options)) + + (retrieve-timeseries [_ timeseries-id] + (retrieve-timeseries-impl client timeseries-id)) + Closeable (close [_] (when sniffer diff --git a/code/src/sixsq/nuvla/db/impl.clj b/code/src/sixsq/nuvla/db/impl.clj index 37b3cc786..c817066ed 100644 --- a/code/src/sixsq/nuvla/db/impl.clj +++ b/code/src/sixsq/nuvla/db/impl.clj @@ -57,6 +57,11 @@ (defn bulk-edit [collection-id & [options]] (p/bulk-edit *impl* collection-id options)) +(defn create-timeseries [timeseries-id & [options]] + (p/create-timeseries *impl* timeseries-id options)) + +(defn retrieve-timeseries [timeseries-id] + (p/retrieve-timeseries *impl* timeseries-id)) (defn close [] (when-let [^Closeable impl *impl*] diff --git a/code/src/sixsq/nuvla/server/resources/spec/timeseries.cljc b/code/src/sixsq/nuvla/server/resources/spec/timeseries.cljc new file mode 100644 index 000000000..d97d59e9b --- /dev/null +++ b/code/src/sixsq/nuvla/server/resources/spec/timeseries.cljc @@ -0,0 +1,51 @@ +(ns sixsq.nuvla.server.resources.spec.timeseries + (:require + [clojure.spec.alpha :as s] + [sixsq.nuvla.server.resources.spec.common :as common] + [sixsq.nuvla.server.resources.spec.core :as core] + [sixsq.nuvla.server.util.spec :as su] + [spec-tools.core :as st])) + +(def field-types #{"keyword" "long" "double"}) + +(s/def ::field-name + (assoc (st/spec ::core/nonblank-string) + :json-schema/description "Timeseries field name")) + +(s/def ::field-type + (assoc (st/spec field-types) + :json-schema/description "Timeseries field name")) + +(s/def ::dimension + (assoc (st/spec (su/only-keys + :req-un [::field-name + ::field-type])) + :json-schema/type "map" + :json-schema/description "Timeseries dimension")) + +(s/def ::dimensions + (-> (st/spec (s/coll-of ::dimension :kind vector? :distinct true)) + (assoc :json-schema/description "Timeseries dimensions"))) + +(def metric-types #{"gauge" "counter"}) + +(s/def ::metric-type + (assoc (st/spec metric-types) + :json-schema/description "Timeseries metric type")) + +(s/def ::metric + (assoc (st/spec (su/only-keys + :req-un [::field-name + ::field-type + ::metric-type])) + :json-schema/type "map" + :json-schema/description "Timeseries metric")) + +(s/def ::metrics + (-> (st/spec (s/coll-of ::metric :kind vector? :distinct true)) + (assoc :json-schema/description "Timeseries metrics"))) + +(s/def ::schema + (su/only-keys-maps common/common-attrs + {:req-un [::dimensions ::metrics]})) + diff --git a/code/src/sixsq/nuvla/server/resources/timeseries.clj b/code/src/sixsq/nuvla/server/resources/timeseries.clj new file mode 100644 index 000000000..1a13f8fa4 --- /dev/null +++ b/code/src/sixsq/nuvla/server/resources/timeseries.clj @@ -0,0 +1,145 @@ +(ns sixsq.nuvla.server.resources.timeseries + " +The `timeseries` resources represent a timeseries. +" + (:require + [sixsq.nuvla.auth.acl-resource :as a] + [sixsq.nuvla.db.impl :as db] + [sixsq.nuvla.server.resources.common.crud :as crud] + [sixsq.nuvla.server.resources.common.std-crud :as std-crud] + [sixsq.nuvla.server.resources.common.utils :as u] + [sixsq.nuvla.server.resources.spec.timeseries :as timeseries])) + + +(def ^:const resource-type (u/ns->type *ns*)) + + +(def ^:const collection-type (u/ns->collection-type *ns*)) + + +(def collection-acl {:query ["group/nuvla-admin"] + :add ["group/nuvla-admin"]}) + + +;; +;; "Implementations" of multimethod declared in crud namespace +;; + + +(def validate-fn (u/create-spec-validation-fn ::timeseries/schema)) + +(defn validate + [resource] + (validate-fn resource)) + +(defmethod crud/validate resource-type + [resource] + (validate resource)) + +(defn validate-metrics + [metrics] + (doseq [metric metrics] + (validate metric))) + +;; +;; use default ACL method +;; + +(defmethod crud/add-acl resource-type + [resource request] + (a/add-acl (dissoc resource :acl) request)) + +(def add-impl (std-crud/add-fn resource-type collection-acl resource-type + :validate-fn validate)) + +(defn resource-id->timeseries-id + [resource-id] + (str "ts-" (u/id->uuid resource-id))) + +(defn dimension->es-property + [{:keys [field-name field-type]}] + [field-name {:type field-type + :time_series_dimension true}]) + +(defn metric->es-property + [{:keys [field-name field-type metric-type]}] + [field-name {:type field-type + :time_series_metric metric-type}]) + +(defn ts-resource->mappings + [{:keys [dimensions metrics]}] + {:properties + (into {"@timestamp" {:type "date" + :format "strict_date_optional_time||epoch_millis"}} + (concat + (map dimension->es-property dimensions) + (map metric->es-property metrics)))}) + +(defn ts-resource->routing-path + [{:keys [dimensions]}] + (mapv :field-name dimensions)) + +(defn create-timeseries + [resource-id] + (let [resource (crud/retrieve-by-id-as-admin resource-id) + mappings (ts-resource->mappings resource) + routing-path (ts-resource->routing-path resource)] + (db/create-timeseries + (resource-id->timeseries-id resource-id) + {:mappings mappings + :routing-path routing-path}))) + +(defmethod crud/add resource-type + [request] + (let [{{:keys [resource-id]} :body :as response} (add-impl request)] + (create-timeseries resource-id) + response)) + + +(def retrieve-impl (std-crud/retrieve-fn resource-type)) + + +(defmethod crud/retrieve resource-type + [request] + (retrieve-impl request)) + + +(def delete-impl (std-crud/delete-fn resource-type)) + + +(defmethod crud/delete resource-type + [request] + (delete-impl request)) + + +;; +;; available operations +;; + +(defmethod crud/set-operations resource-type + [resource _request] + resource) + + +;; +;; collection +;; + +(def query-impl (std-crud/query-fn resource-type collection-acl collection-type)) + + +(defmethod crud/query resource-type + [request] + (query-impl request)) + + +(def bulk-insert-impl (std-crud/bulk-insert-metrics-fn resource-type collection-acl collection-type)) + +(defmethod crud/bulk-action [resource-type "bulk-insert"] + [request] + (validate-metrics (:body request)) + (bulk-insert-impl request)) + +(defn initialize + [] + (std-crud/initialize resource-type ::timeseries/schema)) diff --git a/code/test/sixsq/nuvla/server/resources/spec/timeseries_test.cljc b/code/test/sixsq/nuvla/server/resources/spec/timeseries_test.cljc new file mode 100644 index 000000000..62b7ccc72 --- /dev/null +++ b/code/test/sixsq/nuvla/server/resources/spec/timeseries_test.cljc @@ -0,0 +1,29 @@ +(ns sixsq.nuvla.server.resources.spec.timeseries-test + (:require + [clojure.test :refer [deftest]] + [sixsq.nuvla.server.resources.timeseries :as t] + [sixsq.nuvla.server.resources.spec.spec-test-utils :as stu] + [sixsq.nuvla.server.resources.spec.timeseries :as timeseries] + [sixsq.nuvla.server.util.time :as time])) + +(def valid-acl {:owners ["group/nuvla-admin"] + :edit-acl ["group/nuvla-admin"]}) + +(deftest check-schema + (let [timestamp (time/now-str) + valid-entry {:id (str t/resource-type "/internal") + :resource-type t/resource-type + :created timestamp + :updated timestamp + :acl valid-acl + :dimensions [{:field-name "test-dimension" + :field-type "keyword"}] + :metrics [{:field-name "test-metric" + :field-type "long" + :metric-type "gauge"}]}] + + (stu/is-valid ::timeseries/schema valid-entry) + (stu/is-invalid ::timeseries/schema (assoc valid-entry :unknown "value")) + + (doseq [attr #{:metrics :dimensions}] + (stu/is-invalid ::timeseries/schema (dissoc valid-entry attr))))) diff --git a/code/test/sixsq/nuvla/server/resources/timeseries_lifecycle_test.clj b/code/test/sixsq/nuvla/server/resources/timeseries_lifecycle_test.clj new file mode 100644 index 000000000..7a5177315 --- /dev/null +++ b/code/test/sixsq/nuvla/server/resources/timeseries_lifecycle_test.clj @@ -0,0 +1,52 @@ +(ns sixsq.nuvla.server.resources.timeseries-lifecycle-test + (:require + [clojure.data.json :as json] + [clojure.test :refer [deftest is testing use-fixtures]] + [peridot.core :refer [content-type header request session]] + [sixsq.nuvla.db.impl :as db] + [sixsq.nuvla.server.app.params :as p] + [sixsq.nuvla.server.middleware.authn-info :refer [authn-info-header]] + [sixsq.nuvla.server.resources.common.utils :as u] + [sixsq.nuvla.server.resources.lifecycle-test-utils :as ltu] + [sixsq.nuvla.server.resources.timeseries :as t])) + +(use-fixtures :each ltu/with-test-server-fixture) + +(def base-uri (str p/service-context t/resource-type)) + + +(deftest insert + (let [session-anon (-> (ltu/ring-app) + session + (content-type "application/json")) + session-admin (header session-anon authn-info-header + "group/nuvla-admin group/nuvla-admin group/nuvla-user group/nuvla-anon") + entry {:dimensions [{:field-name "test-dimension" + :field-type "keyword"}] + :metrics [{:field-name "test-metric" + :field-type "long" + :metric-type "gauge"}]} + ts-id (-> session-admin + (request base-uri + :request-method :post + :body (json/write-str entry)) + (ltu/body->edn) + (ltu/is-status 201) + (ltu/location)) + ts-url (str p/service-context ts-id) + ts-resource (-> session-admin + (request ts-url) + (ltu/body->edn) + (ltu/is-status 200) + (ltu/body)) + ts (db/retrieve-timeseries (t/resource-id->timeseries-id ts-id))] + (is (= (assoc entry + :id ts-id + :resource-type "timeseries") + (select-keys ts-resource [:resource-type :id :dimensions :metrics]))) + (is (pos? (count (:data_streams ts)))))) + +(deftest bad-methods + (let [resource-uri (str p/service-context (u/new-resource-id t/resource-type))] + (ltu/verify-405-status [[resource-uri :put] + [resource-uri :post]]))) From 017df55eaa6e2deeb80d799f298baead1c2091f8 Mon Sep 17 00:00:00 2001 From: Alessandro Bellucci Date: Tue, 16 Apr 2024 16:39:15 +0200 Subject: [PATCH 02/28] Insert and bulk insert actions on timeseries --- code/src/sixsq/nuvla/db/binding.clj | 57 ++++--- code/src/sixsq/nuvla/db/es/binding.clj | 40 ++--- code/src/sixsq/nuvla/db/impl.clj | 16 +- .../server/resources/common/std_crud.clj | 12 +- .../server/resources/spec/timeseries.cljc | 11 +- .../nuvla/server/resources/timeseries.clj | 108 ++++++------- .../server/resources/timeseries/utils.clj | 118 ++++++++++++++ .../resources/ts_nuvlaedge_availability.clj | 6 +- .../resources/ts_nuvlaedge_telemetry.clj | 13 +- .../resources/spec/timeseries_test.cljc | 6 +- .../resources/timeseries_lifecycle_test.clj | 147 ++++++++++++++++-- 11 files changed, 394 insertions(+), 140 deletions(-) create mode 100644 code/src/sixsq/nuvla/server/resources/timeseries/utils.clj diff --git a/code/src/sixsq/nuvla/db/binding.clj b/code/src/sixsq/nuvla/db/binding.clj index d89cc3f72..95121a29b 100644 --- a/code/src/sixsq/nuvla/db/binding.clj +++ b/code/src/sixsq/nuvla/db/binding.clj @@ -105,31 +105,6 @@ then a 400 (bad-request) response must be returned. Other appropriate error codes can also be thrown.") - (add-metric - [this collection-id data options] - "This function adds the given metric to the database. The metric - must not already exist in the database. - - On success, the function must return a 201 ring response with the - relative URL of the new metric as the Location. - - On failure, the function must throw an ex-info containing the error - ring response. The error must be 409 (conflict) if the metric - exists already. Other appropriate error codes can also be thrown.") - - (bulk-insert-metrics - [this collection-id data options] - "This function insert the given metrics in the database where the - collection-id corresponds to the name of a metrics Collection. - - On success, the function must return the summary map of what was done - on the db. - - On failure, the function must throw an ex-info containing the error - ring response. If the resource-id does not correspond to a Collection, - then a 400 (bad-request) response must be returned. Other appropriate - error codes can also be thrown.") - (bulk-delete [this collection-id options] "This function removes the given resources in the database where the @@ -157,15 +132,39 @@ error codes can also be thrown.") (create-timeseries - [this timeseries-id options] - "This function creates the given timeseries in the database.") + [this index options] + "This function creates a timeseries with the given index name in the database.") (retrieve-timeseries - [this timeseries-id] + [this index] "This function retrieves the identified timeseries from the database. On success, this returns the clojure map representation of the timeseries. The response must not be embedded in a ring response. On failure, this function must throw an ex-info containing the error - ring response. If the resource doesn't exist, use a 404 status.")) + ring response. If the resource doesn't exist, use a 404 status.") + + (add-timeseries-datapoint + [this index data options] + "This function adds the given timeseries datapoint to the database. + The datapoint with the given timestamp and dimensions must not already exist in the database. + + On success, the function must return a 201 ring response with the + relative URL of the new metric as the Location. + + On failure, the function must throw an ex-info containing the error + ring response. The error must be 409 (conflict) if the metric + exists already. Other appropriate error codes can also be thrown.") + + (bulk-insert-timeseries-datapoints + [this index data options] + "This function insert the given timeseries datapoints in the database. + + On success, the function must return the summary map of what was done + on the db. + + On failure, the function must throw an ex-info containing the error + ring response. If the resource-id does not correspond to a Collection, + then a 400 (bad-request) response must be returned. Other appropriate + error codes can also be thrown.")) diff --git a/code/src/sixsq/nuvla/db/es/binding.clj b/code/src/sixsq/nuvla/db/es/binding.clj index 119bfaf5c..e8bb24938 100644 --- a/code/src/sixsq/nuvla/db/es/binding.clj +++ b/code/src/sixsq/nuvla/db/es/binding.clj @@ -1,7 +1,8 @@ (ns sixsq.nuvla.db.es.binding "Binding protocol implemented for an Elasticsearch database that makes use of the Elasticsearch REST API." - (:require [clojure.tools.logging :as log] + (:require [clojure.data.json :as json] + [clojure.tools.logging :as log] [qbits.spandex :as spandex] [sixsq.nuvla.auth.utils.acl :as acl-utils] [sixsq.nuvla.db.binding :refer [Binding]] @@ -234,13 +235,12 @@ msg (str "unexpected exception querying: " (or error e))] (throw (r/ex-response msg 500)))))) -(defn add-metric-data - [client collection-id data {:keys [refresh] - :or {refresh true} - :as _options}] +(defn add-timeseries-datapoint + [client index data {:keys [refresh] + :or {refresh true} + :as _options}] (try - (let [index (escu/collection-id->index collection-id) - updated-data (-> data + (let [updated-data (-> data (dissoc :timestamp) (assoc "@timestamp" (:timestamp data))) response (spandex/request client {:url [index :_doc] @@ -251,20 +251,19 @@ (if success? {:status 201 :body {:status 201 - :message (str collection-id " metric added")}} - (r/response-conflict collection-id))) + :message (str index " metric added")}} + (r/response-conflict index))) (catch Exception e (let [{:keys [status body] :as _response} (ex-data e) error (:error body)] (if (= 409 status) - (r/response-conflict collection-id) + (r/response-conflict index) (r/response-error (str "unexpected exception: " (or error e)))))))) -(defn bulk-insert-metrics - [client collection-id data _options] +(defn bulk-insert-timeseries-datapoints + [client index data _options] (try - (let [index (escu/collection-id->index collection-id) - data-transform (fn [{:keys [timestamp] :as doc}] + (let [data-transform (fn [{:keys [timestamp] :as doc}] (-> doc (dissoc :timestamp) (assoc "@timestamp" timestamp))) @@ -517,12 +516,6 @@ (query-native [_ collection-id query] (query-data-native client collection-id query)) - (add-metric [_ collection-id data options] - (add-metric-data client collection-id data options)) - - (bulk-insert-metrics [_ collection-id data options] - (bulk-insert-metrics client collection-id data options)) - (bulk-delete [_ collection-id options] (bulk-delete-data client collection-id options)) @@ -535,6 +528,13 @@ (retrieve-timeseries [_ timeseries-id] (retrieve-timeseries-impl client timeseries-id)) + (add-timeseries-datapoint [_ index data options] + (add-timeseries-datapoint client index data options)) + + (bulk-insert-timeseries-datapoints [_ index data options] + (bulk-insert-timeseries-datapoints client index data options)) + + Closeable (close [_] (when sniffer diff --git a/code/src/sixsq/nuvla/db/impl.clj b/code/src/sixsq/nuvla/db/impl.clj index c817066ed..e111fdae2 100644 --- a/code/src/sixsq/nuvla/db/impl.clj +++ b/code/src/sixsq/nuvla/db/impl.clj @@ -45,24 +45,24 @@ (defn query-native [collection-id query] (p/query-native *impl* collection-id query)) -(defn add-metric [collection-id data & [options]] - (p/add-metric *impl* collection-id data options)) - -(defn bulk-insert-metrics [collection-id data & [options]] - (p/bulk-insert-metrics *impl* collection-id data options)) - (defn bulk-delete [collection-id & [options]] (p/bulk-delete *impl* collection-id options)) (defn bulk-edit [collection-id & [options]] (p/bulk-edit *impl* collection-id options)) -(defn create-timeseries [timeseries-id & [options]] - (p/create-timeseries *impl* timeseries-id options)) +(defn create-timeseries [index & [options]] + (p/create-timeseries *impl* index options)) (defn retrieve-timeseries [timeseries-id] (p/retrieve-timeseries *impl* timeseries-id)) +(defn add-timeseries-datapoint [index data & [options]] + (p/add-timeseries-datapoint *impl* index data options)) + +(defn bulk-insert-timeseries-datapoints [index data & [options]] + (p/bulk-insert-timeseries-datapoints *impl* index data options)) + (defn close [] (when-let [^Closeable impl *impl*] (try diff --git a/code/src/sixsq/nuvla/server/resources/common/std_crud.clj b/code/src/sixsq/nuvla/server/resources/common/std_crud.clj index 77062c875..8af7aa627 100644 --- a/code/src/sixsq/nuvla/server/resources/common/std_crud.clj +++ b/code/src/sixsq/nuvla/server/resources/common/std_crud.clj @@ -8,6 +8,7 @@ [clojure.walk :as w] [sixsq.nuvla.auth.acl-resource :as a] [sixsq.nuvla.auth.utils :as auth] + [sixsq.nuvla.db.es.common.utils :as escu] [sixsq.nuvla.db.impl :as db] [sixsq.nuvla.server.middleware.cimi-params.impl :as impl] [sixsq.nuvla.server.resources.common.crud :as crud] @@ -213,23 +214,24 @@ (str "_" resource-name))] (create-bulk-job action-name resource-name authn-info acl body)))) -(defn add-metric-fn +(defn add-timeseries-datapoint-fn [resource-name collection-acl _resource-uri & {:keys [validate-fn options]}] (validate-collection-acl collection-acl) (fn [{:keys [body] :as request}] (a/throw-cannot-add collection-acl request) (validate-fn body) - (db/add-metric resource-name body options))) + (db/add-timeseries-datapoint (escu/collection-id->index resource-name) + body options))) -(defn bulk-insert-metrics-fn - [resource-name collection-acl _collection-uri] +(defn bulk-insert-timeseries-datapoints-fn + [index collection-acl _collection-uri] (validate-collection-acl collection-acl) (fn [{:keys [body] :as request}] (throw-bulk-header-missing request) (a/throw-cannot-add collection-acl request) (a/throw-cannot-bulk-action collection-acl request) (let [options (select-keys request [:nuvla/authn :body]) - response (db/bulk-insert-metrics resource-name body options)] + response (db/bulk-insert-timeseries-datapoints index body options)] (r/json-response response)))) (defn generic-bulk-operation-fn diff --git a/code/src/sixsq/nuvla/server/resources/spec/timeseries.cljc b/code/src/sixsq/nuvla/server/resources/spec/timeseries.cljc index d97d59e9b..3ec5febdc 100644 --- a/code/src/sixsq/nuvla/server/resources/spec/timeseries.cljc +++ b/code/src/sixsq/nuvla/server/resources/spec/timeseries.cljc @@ -14,6 +14,7 @@ (s/def ::field-type (assoc (st/spec field-types) + :json-schema/type "string" :json-schema/description "Timeseries field name")) (s/def ::dimension @@ -31,13 +32,21 @@ (s/def ::metric-type (assoc (st/spec metric-types) + :json-schema/type "string" :json-schema/description "Timeseries metric type")) +(s/def ::optional + (-> (st/spec boolean?) + (assoc :name "optional" + :json-schema/type "boolean" + :json-schema/description "optional value ? (default false)"))) + (s/def ::metric (assoc (st/spec (su/only-keys :req-un [::field-name ::field-type - ::metric-type])) + ::metric-type] + :opt-un [::optional])) :json-schema/type "map" :json-schema/description "Timeseries metric")) diff --git a/code/src/sixsq/nuvla/server/resources/timeseries.clj b/code/src/sixsq/nuvla/server/resources/timeseries.clj index 1a13f8fa4..c8eb74662 100644 --- a/code/src/sixsq/nuvla/server/resources/timeseries.clj +++ b/code/src/sixsq/nuvla/server/resources/timeseries.clj @@ -8,7 +8,9 @@ The `timeseries` resources represent a timeseries. [sixsq.nuvla.server.resources.common.crud :as crud] [sixsq.nuvla.server.resources.common.std-crud :as std-crud] [sixsq.nuvla.server.resources.common.utils :as u] - [sixsq.nuvla.server.resources.spec.timeseries :as timeseries])) + [sixsq.nuvla.server.resources.spec.timeseries :as timeseries] + [sixsq.nuvla.server.resources.timeseries.utils :as utils] + [sixsq.nuvla.server.util.response :as r])) (def ^:const resource-type (u/ns->type *ns*)) @@ -17,9 +19,9 @@ The `timeseries` resources represent a timeseries. (def ^:const collection-type (u/ns->collection-type *ns*)) -(def collection-acl {:query ["group/nuvla-admin"] - :add ["group/nuvla-admin"]}) - +(def collection-acl {:query ["group/nuvla-user"] + :add ["group/nuvla-user"] + :bulk-action ["group/nuvla-user"]}) ;; ;; "Implementations" of multimethod declared in crud namespace @@ -36,11 +38,6 @@ The `timeseries` resources represent a timeseries. [resource] (validate resource)) -(defn validate-metrics - [metrics] - (doseq [metric metrics] - (validate metric))) - ;; ;; use default ACL method ;; @@ -52,47 +49,10 @@ The `timeseries` resources represent a timeseries. (def add-impl (std-crud/add-fn resource-type collection-acl resource-type :validate-fn validate)) -(defn resource-id->timeseries-id - [resource-id] - (str "ts-" (u/id->uuid resource-id))) - -(defn dimension->es-property - [{:keys [field-name field-type]}] - [field-name {:type field-type - :time_series_dimension true}]) - -(defn metric->es-property - [{:keys [field-name field-type metric-type]}] - [field-name {:type field-type - :time_series_metric metric-type}]) - -(defn ts-resource->mappings - [{:keys [dimensions metrics]}] - {:properties - (into {"@timestamp" {:type "date" - :format "strict_date_optional_time||epoch_millis"}} - (concat - (map dimension->es-property dimensions) - (map metric->es-property metrics)))}) - -(defn ts-resource->routing-path - [{:keys [dimensions]}] - (mapv :field-name dimensions)) - -(defn create-timeseries - [resource-id] - (let [resource (crud/retrieve-by-id-as-admin resource-id) - mappings (ts-resource->mappings resource) - routing-path (ts-resource->routing-path resource)] - (db/create-timeseries - (resource-id->timeseries-id resource-id) - {:mappings mappings - :routing-path routing-path}))) - (defmethod crud/add resource-type [request] (let [{{:keys [resource-id]} :body :as response} (add-impl request)] - (create-timeseries resource-id) + (utils/create-timeseries resource-id) response)) @@ -111,14 +71,54 @@ The `timeseries` resources represent a timeseries. [request] (delete-impl request)) +;; +;; insert/bulk insert datapoints actions +;; + +(defmethod crud/do-action [resource-type utils/action-insert] + [{{uuid :uuid} :params body :body :as request}] + (try + (let [id (str resource-type "/" uuid) + timeseries-index (utils/resource-id->timeseries-index id) + timeseries (-> (crud/retrieve-by-id-as-admin id) + (a/throw-cannot-manage request))] + (->> body + (utils/add-timestamp) + (utils/validate-datapoint timeseries) + (db/add-timeseries-datapoint timeseries-index))) + (catch Exception e + (or (ex-data e) (throw e))))) + +(defmethod crud/do-action [resource-type utils/action-bulk-insert] + [{{uuid :uuid} :params body :body :as request}] + (std-crud/throw-bulk-header-missing request) + (try + (let [id (str resource-type "/" uuid) + timeseries-index (utils/resource-id->timeseries-index id) + timeseries (-> (crud/retrieve-by-id-as-admin id) + (a/throw-cannot-manage request))] + (->> body + (map utils/add-timestamp) + (utils/validate-datapoints timeseries) + (db/bulk-insert-timeseries-datapoints timeseries-index)) + (r/map-response "bulk insert of timeseries datapoints executed successfully" 200)) + (catch Exception e + (or (ex-data e) (throw e))))) ;; ;; available operations ;; (defmethod crud/set-operations resource-type - [resource _request] - resource) + [{:keys [id] :as resource} request] + (let [insert-op (u/action-map id utils/action-insert) + bulk-insert-op (u/action-map id utils/action-bulk-insert) + can-manage? (a/can-manage? resource request)] + (assoc resource + :operations + (cond-> [] + can-manage? + (conj insert-op bulk-insert-op))))) ;; @@ -127,19 +127,11 @@ The `timeseries` resources represent a timeseries. (def query-impl (std-crud/query-fn resource-type collection-acl collection-type)) - (defmethod crud/query resource-type [request] (query-impl request)) - -(def bulk-insert-impl (std-crud/bulk-insert-metrics-fn resource-type collection-acl collection-type)) - -(defmethod crud/bulk-action [resource-type "bulk-insert"] - [request] - (validate-metrics (:body request)) - (bulk-insert-impl request)) - (defn initialize [] (std-crud/initialize resource-type ::timeseries/schema)) + diff --git a/code/src/sixsq/nuvla/server/resources/timeseries/utils.clj b/code/src/sixsq/nuvla/server/resources/timeseries/utils.clj new file mode 100644 index 000000000..bd98fc1d9 --- /dev/null +++ b/code/src/sixsq/nuvla/server/resources/timeseries/utils.clj @@ -0,0 +1,118 @@ +(ns sixsq.nuvla.server.resources.timeseries.utils + (:require [clojure.set :as set] + [clojure.string :as str] + [sixsq.nuvla.db.impl :as db] + [sixsq.nuvla.server.resources.common.crud :as crud] + [sixsq.nuvla.server.resources.common.utils :as u] + [sixsq.nuvla.server.util.response :as r] + [sixsq.nuvla.server.util.time :as time])) + +(def action-insert "insert") +(def action-bulk-insert "bulk-insert") + +(defn resource-id->timeseries-index + [resource-id] + (str "ts-" (u/id->uuid resource-id))) + +(defn dimension->es-property + [{:keys [field-name field-type]}] + [field-name {:type field-type + :time_series_dimension true}]) + +(defn metric->es-property + [{:keys [field-name field-type metric-type]}] + [field-name {:type field-type + :time_series_metric metric-type}]) + +(defn ts-resource->mappings + [{:keys [dimensions metrics]}] + {:properties + (into {"@timestamp" {:type "date" + :format "strict_date_optional_time||epoch_millis"}} + (concat + (map dimension->es-property dimensions) + (map metric->es-property metrics)))}) + +(defn ts-resource->routing-path + [{:keys [dimensions]}] + (mapv :field-name dimensions)) + +(defn create-timeseries + [resource-id] + (let [resource (crud/retrieve-by-id-as-admin resource-id) + mappings (ts-resource->mappings resource) + routing-path (ts-resource->routing-path resource)] + (db/create-timeseries + (resource-id->timeseries-index resource-id) + {:mappings mappings + :routing-path routing-path}))) + +(defn throw-missing-dimensions + [{:keys [dimensions] :as _timeseries} datapoint] + (let [missing (->> dimensions + (filter #(nil? (get datapoint (keyword (:field-name %))))))] + (if (empty? missing) + datapoint + (throw (r/ex-response + (str "missing value for dimensions: " (str/join "," (map :field-name missing))) + 400))))) + +(defn throw-missing-mandatory-metrics + [{:keys [metrics] :as _timeseries} datapoint] + (let [missing (->> metrics + (filter #(not (:optional %))) + (filter #(nil? (get datapoint (keyword (:field-name %))))))] + (if (empty? missing) + datapoint + (throw (r/ex-response + (str "missing value for mandatory metrics: " (str/join "," (map :field-name missing))) + 400))))) + +(defn throw-wrong-type + [{:keys [field-name field-type] :as _field} field-value] + (let [check-type-fn (case field-type + "long" int? + "double" number? + "keyword" string?)] + (if (check-type-fn field-value) + field-value + (throw (r/ex-response + (str "a value with the wrong type was provided for field " field-name ": " field-value) + 400))))) + +(defn throw-wrong-types + [{:keys [dimensions metrics] :as _timeseries} datapoint] + (doseq [{:keys [field-name] :as field} (concat dimensions metrics)] + (throw-wrong-type field (get datapoint (keyword field-name)))) + datapoint) + +(defn throw-extra-keys + [{:keys [dimensions metrics] :as _timeseries} datapoint] + (let [extra-keys (set/difference (set (keys (dissoc datapoint :timestamp))) + (->> (concat dimensions metrics) + (map (comp keyword :field-name)) + set))] + (if (empty? extra-keys) + datapoint + (throw (r/ex-response + (str "unexpected keys: " (str/join "," extra-keys)) + 400))))) + +(defn validate-datapoint + [timeseries datapoint] + (->> datapoint + (throw-missing-dimensions timeseries) + (throw-missing-mandatory-metrics timeseries) + (throw-wrong-types timeseries) + (throw-extra-keys timeseries))) + +(defn validate-datapoints + [timeseries datapoints] + (doseq [datapoint datapoints] + (validate-datapoint timeseries datapoint)) + datapoints) + +(defn add-timestamp + [{:keys [timestamp] :as datapoint}] + (cond-> datapoint + (not timestamp) (assoc :timestamp (time/now-str)))) diff --git a/code/src/sixsq/nuvla/server/resources/ts_nuvlaedge_availability.clj b/code/src/sixsq/nuvla/server/resources/ts_nuvlaedge_availability.clj index 534e9a3ea..22c8cec3f 100644 --- a/code/src/sixsq/nuvla/server/resources/ts_nuvlaedge_availability.clj +++ b/code/src/sixsq/nuvla/server/resources/ts_nuvlaedge_availability.clj @@ -39,9 +39,9 @@ The `ts-nuvlaedge` resources create a timeseries related to nuvlaedge availabili [resource _request] resource) -(def add-impl (std-crud/add-metric-fn resource-type collection-acl resource-type - :validate-fn validate - :options {:refresh false})) +(def add-impl (std-crud/add-timeseries-datapoint-fn resource-type collection-acl resource-type + :validate-fn validate + :options {:refresh false})) (defmethod crud/add resource-type diff --git a/code/src/sixsq/nuvla/server/resources/ts_nuvlaedge_telemetry.clj b/code/src/sixsq/nuvla/server/resources/ts_nuvlaedge_telemetry.clj index 4c435c27d..9cd5cf53a 100644 --- a/code/src/sixsq/nuvla/server/resources/ts_nuvlaedge_telemetry.clj +++ b/code/src/sixsq/nuvla/server/resources/ts_nuvlaedge_telemetry.clj @@ -3,6 +3,7 @@ The `ts-nuvlaedge` resources create a timeseries related to nuvlaedge. " (:require + [sixsq.nuvla.db.es.common.utils :as escu] [sixsq.nuvla.server.resources.common.crud :as crud] [sixsq.nuvla.server.resources.common.std-crud :as std-crud] [sixsq.nuvla.server.resources.common.utils :as u] @@ -47,9 +48,9 @@ The `ts-nuvlaedge` resources create a timeseries related to nuvlaedge. [resource _request] resource) -(def add-impl (std-crud/add-metric-fn resource-type collection-acl resource-type - :validate-fn validate - :options {:refresh false})) +(def add-impl (std-crud/add-timeseries-datapoint-fn resource-type collection-acl resource-type + :validate-fn validate + :options {:refresh false})) (defmethod crud/add resource-type @@ -94,7 +95,9 @@ The `ts-nuvlaedge` resources create a timeseries related to nuvlaedge. (query-impl request)) -(def bulk-insert-impl (std-crud/bulk-insert-metrics-fn resource-type collection-acl collection-type)) +(def bulk-insert-impl (std-crud/bulk-insert-timeseries-datapoints-fn + (escu/collection-id->index resource-type) + collection-acl collection-type)) (defmethod crud/bulk-action [resource-type "bulk-insert"] [request] @@ -103,4 +106,4 @@ The `ts-nuvlaedge` resources create a timeseries related to nuvlaedge. (defn initialize [] - (std-crud/initialize-as-timeseries resource-type ::ts-nuvlaedge-telemetry/schema)) \ No newline at end of file + (std-crud/initialize-as-timeseries resource-type ::ts-nuvlaedge-telemetry/schema)) diff --git a/code/test/sixsq/nuvla/server/resources/spec/timeseries_test.cljc b/code/test/sixsq/nuvla/server/resources/spec/timeseries_test.cljc index 62b7ccc72..3e706f824 100644 --- a/code/test/sixsq/nuvla/server/resources/spec/timeseries_test.cljc +++ b/code/test/sixsq/nuvla/server/resources/spec/timeseries_test.cljc @@ -20,7 +20,11 @@ :field-type "keyword"}] :metrics [{:field-name "test-metric" :field-type "long" - :metric-type "gauge"}]}] + :metric-type "gauge"} + {:field-name "test-optional-metric" + :field-type "long" + :metric-type "counter" + :optional true}]}] (stu/is-valid ::timeseries/schema valid-entry) (stu/is-invalid ::timeseries/schema (assoc valid-entry :unknown "value")) diff --git a/code/test/sixsq/nuvla/server/resources/timeseries_lifecycle_test.clj b/code/test/sixsq/nuvla/server/resources/timeseries_lifecycle_test.clj index 7a5177315..74f89801c 100644 --- a/code/test/sixsq/nuvla/server/resources/timeseries_lifecycle_test.clj +++ b/code/test/sixsq/nuvla/server/resources/timeseries_lifecycle_test.clj @@ -8,25 +8,36 @@ [sixsq.nuvla.server.middleware.authn-info :refer [authn-info-header]] [sixsq.nuvla.server.resources.common.utils :as u] [sixsq.nuvla.server.resources.lifecycle-test-utils :as ltu] - [sixsq.nuvla.server.resources.timeseries :as t])) + [sixsq.nuvla.server.resources.timeseries :as t] + [sixsq.nuvla.server.resources.timeseries.utils :as tu] + [sixsq.nuvla.server.util.time :as time])) (use-fixtures :each ltu/with-test-server-fixture) (def base-uri (str p/service-context t/resource-type)) -(deftest insert +(deftest lifecycle (let [session-anon (-> (ltu/ring-app) session (content-type "application/json")) + session-user (header session-anon authn-info-header + "user/jane user/jane group/nuvla-user group/nuvla-anon") session-admin (header session-anon authn-info-header "group/nuvla-admin group/nuvla-admin group/nuvla-user group/nuvla-anon") - entry {:dimensions [{:field-name "test-dimension" + dimension1 "test-dimension1" + metric1 "test-metric1" + metric2 "test-metric2" + entry {:dimensions [{:field-name dimension1 :field-type "keyword"}] - :metrics [{:field-name "test-metric" + :metrics [{:field-name metric1 + :field-type "double" + :metric-type "gauge"} + {:field-name metric2 :field-type "long" - :metric-type "gauge"}]} - ts-id (-> session-admin + :metric-type "counter" + :optional true}]} + ts-id (-> session-user (request base-uri :request-method :post :body (json/write-str entry)) @@ -34,17 +45,133 @@ (ltu/is-status 201) (ltu/location)) ts-url (str p/service-context ts-id) - ts-resource (-> session-admin + ts-response (-> session-user (request ts-url) (ltu/body->edn) (ltu/is-status 200) - (ltu/body)) - ts (db/retrieve-timeseries (t/resource-id->timeseries-id ts-id))] + (ltu/is-operation-present tu/action-insert)) + ts-resource (ltu/body ts-response) + ts (db/retrieve-timeseries (tu/resource-id->timeseries-index ts-id)) + now (time/now)] (is (= (assoc entry :id ts-id :resource-type "timeseries") (select-keys ts-resource [:resource-type :id :dimensions :metrics]))) - (is (pos? (count (:data_streams ts)))))) + (is (pos? (count (:data_streams ts)))) + + (testing "query timeseries" + (let [query-response (-> session-user + (request base-uri) + (ltu/body->edn) + (ltu/is-status 200) + (ltu/is-count 1) + (ltu/body))] + (is (= entry (-> query-response + :resources + first + (select-keys [:dimensions :metrics])))))) + + (testing "insert timeseries datapoint" + (let [datapoint {:timestamp (time/to-str now) + dimension1 "d1-val1" + metric1 3.14 + metric2 1000} + insert-op-url (ltu/get-op-url ts-response tu/action-insert)] + (testing "datapoint validation error: missing dimensions" + (-> session-user + (request insert-op-url + :request-method :post + :body (json/write-str (dissoc datapoint dimension1))) + (ltu/body->edn) + (ltu/is-status 400) + (ltu/is-key-value :message "missing value for dimensions: test-dimension1"))) + + (testing "datapoint validation error: missing value for mandatory metrics" + (-> session-user + (request insert-op-url + :request-method :post + :body (json/write-str (dissoc datapoint metric1))) + (ltu/body->edn) + (ltu/is-status 400) + (ltu/is-key-value :message "missing value for mandatory metrics: test-metric1"))) + + (testing "datapoint validation error: wrong field type provided" + (-> session-user + (request insert-op-url + :request-method :post + :body (json/write-str (assoc datapoint dimension1 1000))) + (ltu/body->edn) + (ltu/is-status 400) + (ltu/is-key-value :message "a value with the wrong type was provided for field test-dimension1: 1000")) + + (-> session-user + (request insert-op-url + :request-method :post + :body (json/write-str (assoc datapoint metric1 "wrong-type"))) + (ltu/body->edn) + (ltu/is-status 400) + (ltu/is-key-value :message "a value with the wrong type was provided for field test-metric1: wrong-type"))) + + (testing "successful insert" + (-> session-user + (request insert-op-url + :request-method :post + :body (json/write-str datapoint)) + (ltu/body->edn) + (ltu/is-status 201))) + + (testing "insert same datapoint again -> conflict" + (-> session-user + (request insert-op-url + :request-method :post + :body (json/write-str datapoint)) + (ltu/body->edn) + (ltu/is-status 409))) + + (testing "timestamp is not mandatory" + (-> session-user + (request insert-op-url + :request-method :post + :body (json/write-str (dissoc datapoint :timestamp))) + (ltu/body->edn) + (ltu/is-status 201))))) + + (testing "bulk insert timeseries datapoints" + (let [datapoints [{:timestamp (time/to-str now) + dimension1 "d1-val2" + metric1 10 + metric2 1} + {:timestamp (time/to-str now) + dimension1 "d1-val3" + metric1 20 + metric2 2}] + bulk-insert-op-url (ltu/get-op-url ts-response tu/action-bulk-insert)] + (testing "missing bulk header" + (-> session-user + (request bulk-insert-op-url + :request-method :post + :body (json/write-str datapoints)) + (ltu/body->edn) + (ltu/is-status 400) + (ltu/is-key-value :message "Bulk request should contain bulk http header."))) + + (testing "successful bulk insert" + (-> session-user + (request bulk-insert-op-url + :headers {"bulk" true} + :request-method :post + :body (json/write-str datapoints)) + (ltu/body->edn) + (ltu/is-status 200))) + + (testing "timestamp is not mandatory" + (-> session-user + (request bulk-insert-op-url + :headers {"bulk" true} + :request-method :post + :body (json/write-str (map #(dissoc % :timestamp) datapoints))) + (ltu/body->edn) + (ltu/is-status 200))))))) (deftest bad-methods (let [resource-uri (str p/service-context (u/new-resource-id t/resource-type))] From ab6c799bb5372d5d621bc7c070130faecc35206d Mon Sep 17 00:00:00 2001 From: Alessandro Bellucci Date: Tue, 16 Apr 2024 17:26:43 +0200 Subject: [PATCH 03/28] Fix test --- code/test/sixsq/nuvla/db/es/binding_test.clj | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/code/test/sixsq/nuvla/db/es/binding_test.clj b/code/test/sixsq/nuvla/db/es/binding_test.clj index 2bbf1bb69..e1ee9ab66 100644 --- a/code/test/sixsq/nuvla/db/es/binding_test.clj +++ b/code/test/sixsq/nuvla/db/es/binding_test.clj @@ -114,7 +114,7 @@ :metric "ram" :ram {:used 0}}) (range 100))] - (t/bulk-insert-metrics client collection-id test-data-last-sec {})) + (t/bulk-insert-timeseries-datapoints client (escu/collection-id->index collection-id) test-data-last-sec {})) (spandex/request client {:url [:_refresh], :method :post}) (let [_response (-> (spandex/request client {:url (str "_data_stream/" index-name)}) (get-in [:body :data_streams])) From a70caa5b8a34fde2121cb31745d5b26a42042396 Mon Sep 17 00:00:00 2001 From: Alessandro Bellucci Date: Thu, 18 Apr 2024 15:25:14 +0200 Subject: [PATCH 04/28] Update and delete timeseries --- code/src/sixsq/nuvla/db/binding.clj | 11 +- code/src/sixsq/nuvla/db/es/binding.clj | 83 ++++++++++++- code/src/sixsq/nuvla/db/impl.clj | 6 + .../nuvla/server/resources/timeseries.clj | 24 +++- .../server/resources/timeseries/utils.clj | 43 ++++++- code/test/sixsq/nuvla/db/es/binding_test.clj | 6 +- .../resources/timeseries_lifecycle_test.clj | 114 +++++++++++++++++- 7 files changed, 269 insertions(+), 18 deletions(-) diff --git a/code/src/sixsq/nuvla/db/binding.clj b/code/src/sixsq/nuvla/db/binding.clj index 95121a29b..cdcc7c462 100644 --- a/code/src/sixsq/nuvla/db/binding.clj +++ b/code/src/sixsq/nuvla/db/binding.clj @@ -145,6 +145,11 @@ On failure, this function must throw an ex-info containing the error ring response. If the resource doesn't exist, use a 404 status.") + (edit-timeseries + [this index options] + "This function updates (edits) the given timeseries in the database. + The timeseries must already exist in the database.") + (add-timeseries-datapoint [this index data options] "This function adds the given timeseries datapoint to the database. @@ -167,4 +172,8 @@ On failure, the function must throw an ex-info containing the error ring response. If the resource-id does not correspond to a Collection, then a 400 (bad-request) response must be returned. Other appropriate - error codes can also be thrown.")) + error codes can also be thrown.") + + (delete-timeseries + [this index options] + "This function deletes a timeseries with the given index name from the database.")) diff --git a/code/src/sixsq/nuvla/db/es/binding.clj b/code/src/sixsq/nuvla/db/es/binding.clj index e8bb24938..87aeb3864 100644 --- a/code/src/sixsq/nuvla/db/es/binding.clj +++ b/code/src/sixsq/nuvla/db/es/binding.clj @@ -376,7 +376,25 @@ error (:error body)] (log/error "unexpected status code when creating/updating" policy-name "ILM policy (" status "). " (or error e))))))) -(defn create-timeseries-template +(defn delete-lifecycle-policy + [client index] + (let [policy-name (str index "-ilm-policy")] + (try + (let [{:keys [status]} + (spandex/request + client + {:url [:_ilm :policy policy-name] + :method :delete})] + (if (= 200 status) + (do (log/debug policy-name "ILM policy deleted") + policy-name) + (log/error "unexpected status code when deleting" policy-name "ILM policy (" status ")"))) + (catch Exception e + (let [{:keys [status body] :as _response} (ex-data e) + error (:error body)] + (log/error "unexpected status code when deleting" policy-name "ILM policy (" status "). " (or error e))))))) + +(defn create-or-update-timeseries-template [client index mappings {:keys [routing-path look-back-time look-ahead-time start-time lifecycle-name]}] (let [template-name (str index "-template")] (try @@ -410,6 +428,22 @@ error (:error body)] (log/error "unexpected status code when creating/updating" template-name "index template (" status "). " (or error e))))))) +(defn delete-timeseries-template + [client index] + (let [template-name (str index "-template")] + (try + (let [{:keys [status]} (spandex/request client + {:url [:_index_template template-name], + :method :delete})] + (if (= 200 status) + (do (log/debug template-name "index template deleted") + template-name) + (log/error "unexpected status code when deleting" template-name "index template (" status ")"))) + (catch Exception e + (let [{:keys [status body] :as _response} (ex-data e) + error (:error body)] + (log/error "unexpected status code when deleting" template-name "index template (" status "). " (or error e))))))) + (defn create-datastream [client datastream-index-name] (try @@ -432,6 +466,19 @@ error (:error body)] (log/error "unexpected status code when creating" datastream-index-name "datastream (" status "). " (or error e))))))))) +(defn delete-datastream + [client datastream-index-name] + (try + (let [{:keys [status]} (spandex/request client {:url [:_data_stream datastream-index-name] + :method :delete})] + (if (= 200 status) + (log/debug datastream-index-name "datastream deleted") + (log/error "unexpected status code when deleting" datastream-index-name "datastream (" status ")"))) + (catch Exception e + (let [{:keys [status body] :as _response} (ex-data e) + error (:error body)] + (log/error "unexpected status code when deleting" datastream-index-name "datastream (" status "). " (or error e)))))) + (defn create-timeseries-impl [client timeseries-id {:keys [mappings @@ -444,8 +491,8 @@ look-back-time "7d"} :as _options}] (let [ilm-policy-name (create-or-update-lifecycle-policy client timeseries-id ilm-policy)] - (create-timeseries-template client timeseries-id mappings - {:routing-path routing-path + (create-or-update-timeseries-template client timeseries-id mappings + {:routing-path routing-path :lifecycle-name ilm-policy-name :look-ahead-time look-ahead-time :look-back-time look-back-time @@ -466,6 +513,30 @@ (throw (r/ex-not-found timeseries-id)) (throw e)))))) +(defn edit-timeseries-impl + [client timeseries-id + {:keys [mappings + routing-path + ilm-policy + look-back-time + look-ahead-time + start-time] + :as _options}] + (when ilm-policy + (create-or-update-lifecycle-policy client timeseries-id ilm-policy)) + (create-or-update-timeseries-template + client timeseries-id mappings + {:routing-path routing-path + :look-ahead-time look-ahead-time + :look-back-time look-back-time + :start-time start-time})) + +(defn delete-timeseries-impl + [client timeseries-id _options] + (delete-datastream client timeseries-id) + (delete-timeseries-template client timeseries-id) + (delete-lifecycle-policy client timeseries-id)) + (defn initialize-collection-timeseries [client collection-id {:keys [spec] :as options}] (let [timeseries-id (escu/collection-id->index collection-id) @@ -528,12 +599,18 @@ (retrieve-timeseries [_ timeseries-id] (retrieve-timeseries-impl client timeseries-id)) + (edit-timeseries [_ timeseries-id options] + (edit-timeseries-impl client timeseries-id options)) + (add-timeseries-datapoint [_ index data options] (add-timeseries-datapoint client index data options)) (bulk-insert-timeseries-datapoints [_ index data options] (bulk-insert-timeseries-datapoints client index data options)) + (delete-timeseries [_ timeseries-id options] + (delete-timeseries-impl client timeseries-id options)) + Closeable (close [_] diff --git a/code/src/sixsq/nuvla/db/impl.clj b/code/src/sixsq/nuvla/db/impl.clj index e111fdae2..4cfeff489 100644 --- a/code/src/sixsq/nuvla/db/impl.clj +++ b/code/src/sixsq/nuvla/db/impl.clj @@ -54,6 +54,9 @@ (defn create-timeseries [index & [options]] (p/create-timeseries *impl* index options)) +(defn edit-timeseries [index & [options]] + (p/edit-timeseries *impl* index options)) + (defn retrieve-timeseries [timeseries-id] (p/retrieve-timeseries *impl* timeseries-id)) @@ -63,6 +66,9 @@ (defn bulk-insert-timeseries-datapoints [index data & [options]] (p/bulk-insert-timeseries-datapoints *impl* index data options)) +(defn delete-timeseries [index & [options]] + (p/delete-timeseries *impl* index options)) + (defn close [] (when-let [^Closeable impl *impl*] (try diff --git a/code/src/sixsq/nuvla/server/resources/timeseries.clj b/code/src/sixsq/nuvla/server/resources/timeseries.clj index c8eb74662..b7df2d904 100644 --- a/code/src/sixsq/nuvla/server/resources/timeseries.clj +++ b/code/src/sixsq/nuvla/server/resources/timeseries.clj @@ -51,8 +51,10 @@ The `timeseries` resources represent a timeseries. (defmethod crud/add resource-type [request] - (let [{{:keys [resource-id]} :body :as response} (add-impl request)] - (utils/create-timeseries resource-id) + (let [{status :status {:keys [resource-id]} :body :as response} (add-impl request)] + (when (= 201 status) + (-> (crud/retrieve-by-id-as-admin resource-id) + (utils/create-timeseries))) response)) @@ -63,13 +65,29 @@ The `timeseries` resources represent a timeseries. [request] (retrieve-impl request)) +(def edit-impl (std-crud/edit-fn resource-type)) + +(defmethod crud/edit resource-type + [{{uuid :uuid} :params :as request}] + (let [current (-> (str resource-type "/" uuid) + crud/retrieve-by-id-as-admin + (a/throw-cannot-edit request)) + resp (-> request + (utils/throw-dimensions-can-only-be-appended current) + (utils/throw-metrics-can-only-be-added current) + edit-impl)] + (utils/edit-timeseries (:body resp)) + resp)) (def delete-impl (std-crud/delete-fn resource-type)) (defmethod crud/delete resource-type [request] - (delete-impl request)) + (let [{:keys [status] :as response} (delete-impl request)] + (when (= 200 status) + (utils/delete-timeseries (u/request->resource-id request))) + response)) ;; ;; insert/bulk insert datapoints actions diff --git a/code/src/sixsq/nuvla/server/resources/timeseries/utils.clj b/code/src/sixsq/nuvla/server/resources/timeseries/utils.clj index bd98fc1d9..102503b39 100644 --- a/code/src/sixsq/nuvla/server/resources/timeseries/utils.clj +++ b/code/src/sixsq/nuvla/server/resources/timeseries/utils.clj @@ -38,15 +38,50 @@ (mapv :field-name dimensions)) (defn create-timeseries - [resource-id] - (let [resource (crud/retrieve-by-id-as-admin resource-id) - mappings (ts-resource->mappings resource) + [{:keys [id] :as resource}] + (let [mappings (ts-resource->mappings resource) routing-path (ts-resource->routing-path resource)] (db/create-timeseries - (resource-id->timeseries-index resource-id) + (resource-id->timeseries-index id) {:mappings mappings :routing-path routing-path}))) +(defn throw-dimensions-can-only-be-appended + [{{new-dimensions :dimensions} :body :as request} + {current-dimensions :dimensions :as _current}] + (when current-dimensions + (when-not (and (>= (count new-dimensions) (count current-dimensions)) + (= current-dimensions + (subvec new-dimensions 0 (count current-dimensions)))) + (throw (r/ex-response "dimensions can only be appended" 400)))) + request) + +(defn throw-metrics-can-only-be-added + [{{new-metrics :metrics} :body :as request} + {current-metrics :metrics :as _current}] + (when-not (every? (fn [{:keys [field-name] :as current-metric}] + (= current-metric + (->> new-metrics + (filter #(= field-name (:field-name %))) + first))) + current-metrics) + (throw (r/ex-response "metrics can only be added" 400))) + request) + +(defn edit-timeseries + [{:keys [id] :as resource}] + (let [mappings (ts-resource->mappings resource) + routing-path (ts-resource->routing-path resource)] + (db/edit-timeseries + (resource-id->timeseries-index id) + {:mappings mappings + :routing-path routing-path}))) + +(defn delete-timeseries + [resource-id] + (db/delete-timeseries + (resource-id->timeseries-index resource-id))) + (defn throw-missing-dimensions [{:keys [dimensions] :as _timeseries} datapoint] (let [missing (->> dimensions diff --git a/code/test/sixsq/nuvla/db/es/binding_test.clj b/code/test/sixsq/nuvla/db/es/binding_test.clj index e1ee9ab66..6061bb6e6 100644 --- a/code/test/sixsq/nuvla/db/es/binding_test.clj +++ b/code/test/sixsq/nuvla/db/es/binding_test.clj @@ -50,7 +50,7 @@ datastream-index-name "test-ts-index-1"] (testing "Create timeseries template" - (t/create-timeseries-template client index-name mapping {:routing-path routing-path}) + (t/create-or-update-timeseries-template client index-name mapping {:routing-path routing-path}) (let [response (-> (spandex/request client {:url (str "_index_template/" template-name)}) (get-in [:body :index_templates 0]))] (is (= template-name (:name response))) @@ -94,8 +94,8 @@ (is (= #{:hot :warm :delete} (set (keys phases)))) (testing "Create timeseries template with ilm policy" - (let [template-name (t/create-timeseries-template client index-name mapping - {:routing-path routing-path + (let [template-name (t/create-or-update-timeseries-template client index-name mapping + {:routing-path routing-path :start-time (time/to-str (time/minus (time/now) (time/duration-unit 20 :hours))) :lifecycle-name ilm-policy-name}) response (-> (spandex/request client {:url (str "_index_template/" template-name)}) diff --git a/code/test/sixsq/nuvla/server/resources/timeseries_lifecycle_test.clj b/code/test/sixsq/nuvla/server/resources/timeseries_lifecycle_test.clj index 74f89801c..ed90c90aa 100644 --- a/code/test/sixsq/nuvla/server/resources/timeseries_lifecycle_test.clj +++ b/code/test/sixsq/nuvla/server/resources/timeseries_lifecycle_test.clj @@ -37,6 +37,7 @@ :field-type "long" :metric-type "counter" :optional true}]} + ;; create timeseries ts-id (-> session-user (request base-uri :request-method :post @@ -45,13 +46,16 @@ (ltu/is-status 201) (ltu/location)) ts-url (str p/service-context ts-id) + ;; retrieve timeseries ts-response (-> session-user (request ts-url) (ltu/body->edn) (ltu/is-status 200) (ltu/is-operation-present tu/action-insert)) ts-resource (ltu/body ts-response) - ts (db/retrieve-timeseries (tu/resource-id->timeseries-index ts-id)) + ts-index (tu/resource-id->timeseries-index ts-id) + ts (db/retrieve-timeseries ts-index) + insert-op-url (ltu/get-op-url ts-response tu/action-insert) now (time/now)] (is (= (assoc entry :id ts-id @@ -75,8 +79,7 @@ (let [datapoint {:timestamp (time/to-str now) dimension1 "d1-val1" metric1 3.14 - metric2 1000} - insert-op-url (ltu/get-op-url ts-response tu/action-insert)] + metric2 1000}] (testing "datapoint validation error: missing dimensions" (-> session-user (request insert-op-url @@ -171,7 +174,110 @@ :request-method :post :body (json/write-str (map #(dissoc % :timestamp) datapoints))) (ltu/body->edn) - (ltu/is-status 200))))))) + (ltu/is-status 200))))) + + (testing "update timeseries" + (let [dimension2 "test-dimension2" + metric3 "test-metric3"] + (testing "removing existing dimensions is not allowed" + (let [nok-entry {:dimensions [{:field-name dimension2 + :field-type "keyword"}] + :metrics [{:field-name metric1 + :field-type "double" + :metric-type "gauge"} + {:field-name metric2 + :field-type "long" + :metric-type "counter" + :optional true}]}] + (-> session-user + (request ts-url + :request-method :put + :body (json/write-str nok-entry)) + (ltu/body->edn) + (ltu/is-status 400) + (ltu/is-key-value :message "dimensions can only be appended")))) + + (testing "removing existing metrics is not allowed" + (let [nok-entry {:dimensions [{:field-name dimension1 + :field-type "keyword"}] + :metrics [{:field-name metric1 + :field-type "double" + :metric-type "gauge"} + {:field-name metric3 + :field-type "double" + :metric-type "gauge"}]}] + (-> session-user + (request ts-url + :request-method :put + :body (json/write-str nok-entry)) + (ltu/body->edn) + (ltu/is-status 400) + (ltu/is-key-value :message "metrics can only be added")))) + + (testing "successful update - additional dimension and additional metric" + (let [updated-entry {:dimensions [{:field-name dimension1 + :field-type "keyword"} + {:field-name dimension2 + :field-type "keyword"}] + :metrics [{:field-name metric1 + :field-type "double" + :metric-type "gauge"} + {:field-name metric2 + :field-type "long" + :metric-type "counter" + :optional true} + {:field-name metric3 + :field-type "double" + :metric-type "gauge"}]}] + (-> session-user + (request ts-url + :request-method :put + :body (json/write-str updated-entry)) + (ltu/body->edn) + (ltu/is-status 200)) + + (testing "insert datapoint with updated schema" + (let [datapoint {:timestamp (time/now-str) + dimension1 "d1-val1" + dimension2 "d2-val1" + metric1 3.14 + metric2 1000 + metric3 12.34}] + (-> session-user + (request insert-op-url + :request-method :post + :body (json/write-str datapoint)) + (ltu/body->edn) + (ltu/is-status 201)))) + + (testing "changing the order of existing dimensions is not allowed" + (let [nok-entry (assoc updated-entry :dimensions + [{:field-name dimension2 + :field-type "keyword"} + {:field-name dimension1 + :field-type "keyword"}])] + (-> session-user + (request ts-url + :request-method :put + :body (json/write-str nok-entry)) + (ltu/body->edn) + (ltu/is-status 400) + (ltu/is-key-value :message "dimensions can only be appended")))))))) + + (testing "delete timeseries" + (-> session-user + (request ts-url :request-method :delete) + (ltu/body->edn) + (ltu/is-status 200)) + + ;; timeseries meta doc is deleted + (-> session-user + (request ts-url) + (ltu/body->edn) + (ltu/is-status 404)) + + ;; timeseries is also deleted + (is (thrown? Exception (db/retrieve-timeseries ts-index)))))) (deftest bad-methods (let [resource-uri (str p/service-context (u/new-resource-id t/resource-type))] From 424539197acf97e2c2bb77721be0165068c17757 Mon Sep 17 00:00:00 2001 From: Alessandro Bellucci Date: Thu, 18 Apr 2024 16:25:13 +0200 Subject: [PATCH 05/28] Fix test --- .../sixsq/nuvla/server/resources/timeseries_lifecycle_test.clj | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/code/test/sixsq/nuvla/server/resources/timeseries_lifecycle_test.clj b/code/test/sixsq/nuvla/server/resources/timeseries_lifecycle_test.clj index ed90c90aa..743b3f619 100644 --- a/code/test/sixsq/nuvla/server/resources/timeseries_lifecycle_test.clj +++ b/code/test/sixsq/nuvla/server/resources/timeseries_lifecycle_test.clj @@ -281,5 +281,4 @@ (deftest bad-methods (let [resource-uri (str p/service-context (u/new-resource-id t/resource-type))] - (ltu/verify-405-status [[resource-uri :put] - [resource-uri :post]]))) + (ltu/verify-405-status [[resource-uri :post]]))) From 07971198813f9480ce175c5f68c703480ba884d9 Mon Sep 17 00:00:00 2001 From: Alessandro Bellucci Date: Fri, 19 Apr 2024 09:14:56 +0200 Subject: [PATCH 06/28] Add queries key to timeseries spec --- code/src/sixsq/nuvla/db/es/binding.clj | 3 +- .../server/resources/spec/timeseries.cljc | 58 +++++++++++++++++-- .../nuvla/server/resources/timeseries.clj | 18 ++++++ .../server/resources/timeseries/utils.clj | 2 +- .../resources/spec/timeseries_test.cljc | 28 ++++++++- 5 files changed, 101 insertions(+), 8 deletions(-) diff --git a/code/src/sixsq/nuvla/db/es/binding.clj b/code/src/sixsq/nuvla/db/es/binding.clj index 87aeb3864..9038c3107 100644 --- a/code/src/sixsq/nuvla/db/es/binding.clj +++ b/code/src/sixsq/nuvla/db/es/binding.clj @@ -1,8 +1,7 @@ (ns sixsq.nuvla.db.es.binding "Binding protocol implemented for an Elasticsearch database that makes use of the Elasticsearch REST API." - (:require [clojure.data.json :as json] - [clojure.tools.logging :as log] + (:require [clojure.tools.logging :as log] [qbits.spandex :as spandex] [sixsq.nuvla.auth.utils.acl :as acl-utils] [sixsq.nuvla.db.binding :refer [Binding]] diff --git a/code/src/sixsq/nuvla/server/resources/spec/timeseries.cljc b/code/src/sixsq/nuvla/server/resources/spec/timeseries.cljc index 3ec5febdc..b3c1bae8e 100644 --- a/code/src/sixsq/nuvla/server/resources/spec/timeseries.cljc +++ b/code/src/sixsq/nuvla/server/resources/spec/timeseries.cljc @@ -37,8 +37,7 @@ (s/def ::optional (-> (st/spec boolean?) - (assoc :name "optional" - :json-schema/type "boolean" + (assoc :json-schema/type "boolean" :json-schema/description "optional value ? (default false)"))) (s/def ::metric @@ -54,7 +53,58 @@ (-> (st/spec (s/coll-of ::metric :kind vector? :distinct true)) (assoc :json-schema/description "Timeseries metrics"))) +(s/def ::query-name + (assoc (st/spec ::core/nonblank-string) + :json-schema/description "Timeseries query name")) + +(def query-types #{"standard" "custom-es-query"}) + +(s/def ::query-type + (assoc (st/spec query-types) + :json-schema/type "string" + :json-schema/description "Timeseries query type")) + +(s/def ::aggregation-name + (assoc (st/spec ::core/nonblank-string) + :json-schema/description "Timeseries query aggregation name")) + +(def aggregation-types #{"avg" "min" "max"}) + +(s/def ::aggregation-type + (assoc (st/spec aggregation-types) + :json-schema/type "string" + :json-schema/description "Timeseries query aggregation type")) + +(s/def ::aggregation + (assoc (st/spec (su/only-keys + :req-un [::aggregation-name + ::aggregation-type + ::field-name])) + :json-schema/type "map" + :json-schema/description "Timeseries query aggregation specification")) + +(s/def ::aggregations + (-> (st/spec (s/coll-of ::aggregation :kind vector? :distinct true)) + (assoc :json-schema/description "Query aggregations"))) + +(s/def ::query + (assoc (st/spec (su/only-keys + :req-un [::aggregations])) + :json-schema/type "map" + :json-schema/description "Timeseries query")) + +(s/def ::query-definition + (assoc (st/spec (su/only-keys + :req-un [::query-name + ::query-type + ::query])) + :json-schema/type "map" + :json-schema/description "Timeseries query definition")) + +(s/def ::queries + (-> (st/spec (s/coll-of ::query-definition :kind vector? :distinct true)) + (assoc :json-schema/description "Queries supported by the timeseries"))) + (s/def ::schema (su/only-keys-maps common/common-attrs - {:req-un [::dimensions ::metrics]})) - + {:req-un [::dimensions ::metrics ::queries]})) diff --git a/code/src/sixsq/nuvla/server/resources/timeseries.clj b/code/src/sixsq/nuvla/server/resources/timeseries.clj index b7df2d904..131980d95 100644 --- a/code/src/sixsq/nuvla/server/resources/timeseries.clj +++ b/code/src/sixsq/nuvla/server/resources/timeseries.clj @@ -123,6 +123,24 @@ The `timeseries` resources represent a timeseries. (catch Exception e (or (ex-data e) (throw e))))) +;; +;; data query action +;; + +(defmethod crud/do-action [resource-type utils/action-data] + [{{uuid :uuid} :params body :body :as request}] + (try + (let [id (str resource-type "/" uuid) + timeseries-index (utils/resource-id->timeseries-index id) + timeseries (-> (crud/retrieve-by-id-as-admin id) + (a/throw-cannot-manage request))] + (->> body + (utils/add-timestamp) + (utils/validate-datapoint timeseries) + (db/add-timeseries-datapoint timeseries-index))) + (catch Exception e + (or (ex-data e) (throw e))))) + ;; ;; available operations ;; diff --git a/code/src/sixsq/nuvla/server/resources/timeseries/utils.clj b/code/src/sixsq/nuvla/server/resources/timeseries/utils.clj index 102503b39..8ea23c842 100644 --- a/code/src/sixsq/nuvla/server/resources/timeseries/utils.clj +++ b/code/src/sixsq/nuvla/server/resources/timeseries/utils.clj @@ -2,13 +2,13 @@ (:require [clojure.set :as set] [clojure.string :as str] [sixsq.nuvla.db.impl :as db] - [sixsq.nuvla.server.resources.common.crud :as crud] [sixsq.nuvla.server.resources.common.utils :as u] [sixsq.nuvla.server.util.response :as r] [sixsq.nuvla.server.util.time :as time])) (def action-insert "insert") (def action-bulk-insert "bulk-insert") +(def action-data "data") (defn resource-id->timeseries-index [resource-id] diff --git a/code/test/sixsq/nuvla/server/resources/spec/timeseries_test.cljc b/code/test/sixsq/nuvla/server/resources/spec/timeseries_test.cljc index 3e706f824..a39f8e185 100644 --- a/code/test/sixsq/nuvla/server/resources/spec/timeseries_test.cljc +++ b/code/test/sixsq/nuvla/server/resources/spec/timeseries_test.cljc @@ -24,7 +24,33 @@ {:field-name "test-optional-metric" :field-type "long" :metric-type "counter" - :optional true}]}] + :optional true}] + :queries [{:query-name "test-metric-avg-query" + :query-type "standard" + :query {:aggregations [{:aggregation-name "test-metric-avg" + :aggregation-type "avg" + :field-name "test-metric"}]}} + {:query-name "test-metric-min-query" + :query-type "standard" + :query {:aggregations [{:aggregation-name "test-metric-min" + :aggregation-type "min" + :field-name "test-metric"}]}} + {:query-name "test-metric-max-query" + :query-type "standard" + :query {:aggregations [{:aggregation-name "test-metric-max" + :aggregation-type "max" + :field-name "test-metric"}]}} + {:query-name "test-metric-multi-query" + :query-type "standard" + :query {:aggregations [{:aggregation-name "test-metric-avg" + :aggregation-type "avg" + :field-name "test-metric"} + {:aggregation-name "test-metric-min" + :aggregation-type "min" + :field-name "test-metric"} + {:aggregation-name "test-metric-max" + :aggregation-type "max" + :field-name "test-metric"}]}}]}] (stu/is-valid ::timeseries/schema valid-entry) (stu/is-invalid ::timeseries/schema (assoc valid-entry :unknown "value")) From 24a7ee0fba1495c11cd0a790566c201ffdecf8da Mon Sep 17 00:00:00 2001 From: Alessandro Bellucci Date: Tue, 23 Apr 2024 07:21:27 +0200 Subject: [PATCH 07/28] crud/action timeseries/uuid data query --- code/src/sixsq/nuvla/db/binding.clj | 9 +- code/src/sixsq/nuvla/db/es/binding.clj | 15 +- code/src/sixsq/nuvla/db/impl.clj | 4 +- .../nuvla/server/resources/common/crud.clj | 3 +- .../server/resources/nuvlabox/data_utils.clj | 353 ++------------- .../server/resources/spec/timeseries.cljc | 10 +- .../nuvla/server/resources/timeseries.clj | 16 +- .../resources/timeseries/data_utils.clj | 426 ++++++++++++++++++ .../nuvlabox_status_2_lifecycle_test.clj | 8 +- .../resources/spec/timeseries_test.cljc | 5 +- .../resources/timeseries_lifecycle_test.clj | 206 +++++++-- 11 files changed, 680 insertions(+), 375 deletions(-) create mode 100644 code/src/sixsq/nuvla/server/resources/timeseries/data_utils.clj diff --git a/code/src/sixsq/nuvla/db/binding.clj b/code/src/sixsq/nuvla/db/binding.clj index cdcc7c462..e9f60f136 100644 --- a/code/src/sixsq/nuvla/db/binding.clj +++ b/code/src/sixsq/nuvla/db/binding.clj @@ -94,16 +94,13 @@ error codes can also be thrown.") (query-native - [this collection-id options] - "This function executes a native query, where the collection-id - corresponds to the name of a Collection. + [this index options] + "This function executes a native query against the given index. On success, the function must return the response body. On failure, the function must throw an ex-info containing the error - ring response. If the resource-id does not correspond to a Collection, - then a 400 (bad-request) response must be returned. Other appropriate - error codes can also be thrown.") + ring response.") (bulk-delete [this collection-id options] diff --git a/code/src/sixsq/nuvla/db/es/binding.clj b/code/src/sixsq/nuvla/db/es/binding.clj index 9038c3107..20eda6168 100644 --- a/code/src/sixsq/nuvla/db/es/binding.clj +++ b/code/src/sixsq/nuvla/db/es/binding.clj @@ -180,9 +180,11 @@ {:timeout (str timeout "ms")})) (defn query-data - [client collection-id {:keys [cimi-params params] :as options}] + [client collection-id {:keys [cimi-params params no-prefix] :as options}] (try - (let [index (escu/collection-id->index collection-id) + (let [index (if no-prefix + collection-id + (escu/collection-id->index collection-id)) paging (paging/paging cimi-params) orderby (order/sorters cimi-params) aggregation (merge-with merge @@ -218,10 +220,9 @@ (throw (r/ex-response msg 500)))))) (defn query-data-native - [client collection-id query] + [client index query] (try - (let [index (escu/collection-id->index collection-id) - response (spandex/request client {:url [index :_search] + (let [response (spandex/request client {:url [index :_search] :method :post :body query})] (if (shards-successful? response) @@ -583,8 +584,8 @@ (query [_ collection-id options] (query-data client collection-id options)) - (query-native [_ collection-id query] - (query-data-native client collection-id query)) + (query-native [_ index query] + (query-data-native client index query)) (bulk-delete [_ collection-id options] (bulk-delete-data client collection-id options)) diff --git a/code/src/sixsq/nuvla/db/impl.clj b/code/src/sixsq/nuvla/db/impl.clj index 4cfeff489..78e5e607b 100644 --- a/code/src/sixsq/nuvla/db/impl.clj +++ b/code/src/sixsq/nuvla/db/impl.clj @@ -42,8 +42,8 @@ (defn query [collection-id & [options]] (p/query *impl* collection-id options)) -(defn query-native [collection-id query] - (p/query-native *impl* collection-id query)) +(defn query-native [index query] + (p/query-native *impl* index query)) (defn bulk-delete [collection-id & [options]] (p/bulk-delete *impl* collection-id options)) diff --git a/code/src/sixsq/nuvla/server/resources/common/crud.clj b/code/src/sixsq/nuvla/server/resources/common/crud.clj index 19b412466..80b517d3c 100644 --- a/code/src/sixsq/nuvla/server/resources/common/crud.clj +++ b/code/src/sixsq/nuvla/server/resources/common/crud.clj @@ -2,6 +2,7 @@ (:require [sixsq.nuvla.auth.acl-resource :as a] [sixsq.nuvla.auth.utils :as auth] + [sixsq.nuvla.db.es.common.utils :as escu] [sixsq.nuvla.db.filter.parser :as parser] [sixsq.nuvla.db.impl :as db] [sixsq.nuvla.server.resources.common.utils :as u] @@ -55,7 +56,7 @@ (defn query-native "Executes the database query as a native query." [collection-id query] - (db/query-native collection-id query)) + (db/query-native (escu/collection-id->index collection-id) query)) (defmulti bulk-delete resource-name-dispatch) diff --git a/code/src/sixsq/nuvla/server/resources/nuvlabox/data_utils.clj b/code/src/sixsq/nuvla/server/resources/nuvlabox/data_utils.clj index 728118400..5c979cf2d 100644 --- a/code/src/sixsq/nuvla/server/resources/nuvlabox/data_utils.clj +++ b/code/src/sixsq/nuvla/server/resources/nuvlabox/data_utils.clj @@ -1,24 +1,22 @@ (ns sixsq.nuvla.server.resources.nuvlabox.data-utils (:require [clojure.data.csv :as csv] - [clojure.data.json :as json] [clojure.set :as set] [clojure.string :as str] [clojure.tools.logging :as log] [environ.core :as env] [promesa.core :as p] [promesa.exec :as px] - [ring.middleware.accept :refer [wrap-accept]] [sixsq.nuvla.auth.utils :as auth] [sixsq.nuvla.db.filter.parser :as parser] [sixsq.nuvla.server.middleware.cimi-params.impl :as cimi-params-impl] [sixsq.nuvla.server.resources.common.crud :as crud] [sixsq.nuvla.server.resources.common.utils :as u] [sixsq.nuvla.server.resources.nuvlabox.utils :as utils] + [sixsq.nuvla.server.resources.timeseries.data-utils :as ts-data-utils] [sixsq.nuvla.server.resources.ts-nuvlaedge-availability :as ts-nuvlaedge-availability] [sixsq.nuvla.server.resources.ts-nuvlaedge-telemetry :as ts-nuvlaedge-telemetry] [sixsq.nuvla.server.util.log :as logu] - [sixsq.nuvla.server.util.response :as r] [sixsq.nuvla.server.util.time :as time]) (:import (java.io StringWriter) @@ -26,7 +24,6 @@ (java.util Locale) (java.util.concurrent ExecutionException TimeoutException))) -(def max-data-points 200) (def running-query-data (atom 0)) (def requesting-query-data (atom 0)) (def query-data-max-attempts (env/env :query-data-max-attempts 50)) @@ -164,123 +161,17 @@ (catch Exception ex (log/error "An error occurred inserting metrics: " (ex-message ex))))) -(defn ->predefined-aggregations-resp - [{:keys [mode nuvlaedge-ids aggregations] group-by-field :group-by} resp] - (let [ts-data (fn [tsds-stats] - (map - (fn [{:keys [key_as_string doc_count] :as bucket}] - {:timestamp key_as_string - :doc-count doc_count - :aggregations (->> (keys aggregations) - (select-keys bucket))}) - (:buckets tsds-stats))) - dimensions (case mode - :single-edge-query - {:nuvlaedge-id (first nuvlaedge-ids)} - :multi-edge-query - {:nuvlaedge-count (count nuvlaedge-ids)}) - hits (second resp)] - (if group-by-field - (for [{:keys [key tsds-stats]} (get-in resp [0 :aggregations :by-field :buckets])] - (cond-> - {:dimensions (assoc dimensions group-by-field key) - :ts-data (ts-data tsds-stats)} - (seq hits) (assoc :hits hits))) - [(cond-> - {:dimensions dimensions - :ts-data (ts-data (get-in resp [0 :aggregations :tsds-stats]))} - (seq hits) (assoc :hits hits))]))) - -(defn ->custom-es-aggregations-resp - [{:keys [mode nuvlaedge-ids]} resp] - (let [ts-data (fn [tsds-stats] - (map - (fn [{:keys [key_as_string doc_count] :as bucket}] - {:timestamp key_as_string - :doc-count doc_count - :aggregations (dissoc bucket :key_as_string :key :doc_count)}) - (:buckets tsds-stats))) - dimensions (case mode - :single-edge-query - {:nuvlaedge-id (first nuvlaedge-ids)} - :multi-edge-query - {:nuvlaedge-count (count nuvlaedge-ids)})] - [(merge {:dimensions dimensions} - (into {} (for [agg-key (keys (get-in resp [0 :aggregations]))] - [agg-key (ts-data (get-in resp [0 :aggregations agg-key]))])))])) - -(defn ->raw-resp - [{:keys [mode nuvlaedge-ids]} resp] - (let [dimensions (case mode - :single-edge-query - {:nuvlaedge-id (first nuvlaedge-ids)} - :multi-edge-query - {:nuvlaedge-count (count nuvlaedge-ids)}) - hits (second resp)] - [{:dimensions dimensions - :ts-data (sort-by :timestamp hits)}])) - -(defn ->metrics-resp - [{:keys [predefined-aggregations custom-es-aggregations raw] :as options} resp] - (cond - predefined-aggregations - (->predefined-aggregations-resp options resp) - - raw - (->raw-resp options resp) - - custom-es-aggregations - (->custom-es-aggregations-resp options resp))) - -(defn build-aggregations-clause - [{:keys [predefined-aggregations raw custom-es-aggregations from to ts-interval aggregations] group-by-field :group-by}] - (cond - raw - {} ;; send an empty :tsds-aggregation to avoid acl checks. TODO: find a cleaner way - - predefined-aggregations - (let [tsds-aggregations {:tsds-stats - {:date_histogram - {:field "@timestamp" - :fixed_interval ts-interval - :min_doc_count 0 - :extended_bounds {:min (time/to-str from) - :max (time/to-str to)}} - :aggregations (or aggregations {})}}] - - (if group-by-field - {:aggregations - {:by-field - {:terms {:field group-by-field} - :aggregations tsds-aggregations}}} - {:aggregations tsds-aggregations})) - - custom-es-aggregations - {:aggregations custom-es-aggregations})) - -(defn build-ts-query [{:keys [last nuvlaedge-ids from to additional-filters orderby] :as options}] - (let [nuvlabox-id-filter (str "nuvlaedge-id=[" (str/join " " (map #(str "'" % "'") - nuvlaedge-ids)) - "]") - time-range-filter (str "@timestamp>'" (time/to-str from) "'" - " and " - "@timestamp<'" (time/to-str to) "'") - aggregation-clause (build-aggregations-clause options)] - (cond-> - {:cimi-params (cond-> - {:last (or last 0) - :filter - (parser/parse-cimi-filter - (str "(" - (apply str - (interpose " and " - (into [nuvlabox-id-filter - time-range-filter] - additional-filters))) - ")"))} - orderby (assoc :orderby orderby))} - aggregation-clause - (assoc :params {:tsds-aggregation (json/write-str aggregation-clause)})))) +(defn ->resp-dimensions + [{:keys [mode nuvlaedge-ids]}] + (case mode + :single-edge-query + {:nuvlaedge-id (first nuvlaedge-ids)} + :multi-edge-query + {:nuvlaedge-count (count nuvlaedge-ids)})) + +(defn build-ts-query [{:keys [nuvlaedge-ids] :as options}] + (ts-data-utils/build-ts-query + (assoc options :dimensions-filters {"nuvlaedge-id" nuvlaedge-ids}))) (defn build-availability-query [options] ;; return up to 10000 availability state updates @@ -290,7 +181,7 @@ [options] (->> (build-availability-query options) (crud/query-as-admin ts-nuvlaedge-availability/resource-type) - (->metrics-resp options))) + (ts-data-utils/->ts-query-resp (assoc options :->resp-dimensions-fn ->resp-dimensions)))) (defn query-availability-raw ([options] @@ -329,13 +220,13 @@ (defn build-telemetry-query [{:keys [raw metric] :as options}] (build-ts-query (-> options (assoc :additional-filters [(str "metric='" metric "'")]) - (cond-> raw (assoc :last max-data-points))))) + (cond-> raw (assoc :last ts-data-utils/max-data-points))))) (defn query-metrics [options] (->> (build-telemetry-query options) (crud/query-as-admin ts-nuvlaedge-telemetry/resource-type) - (->metrics-resp options))) + (ts-data-utils/->ts-query-resp (assoc options :->resp-dimensions-fn ->resp-dimensions)))) (defn latest-availability-status ([nuvlaedge-id] @@ -740,13 +631,13 @@ (defn csv-dimension-keys-fn [] - (fn [{:keys [raw predefined-aggregations datasets datasets-opts mode]}] + (fn [{:keys [raw predefined-aggregations queries query-specs mode]}] (cond raw [] predefined-aggregations - (let [{group-by-field :group-by} (get datasets-opts (first datasets)) + (let [{group-by-field :group-by} (get query-specs (first queries)) dimension-keys (case mode :single-edge-query [] @@ -768,8 +659,8 @@ (defn availability-csv-metric-keys-fn [] - (fn [{:keys [predefined-aggregations raw datasets datasets-opts]}] - (let [{:keys [response-aggs]} (get datasets-opts (first datasets))] + (fn [{:keys [predefined-aggregations raw queries query-specs]}] + (let [{:keys [response-aggs]} (get query-specs (first queries))] (cond raw [:online] predefined-aggregations response-aggs)))) @@ -793,9 +684,9 @@ (defn telemetry-csv-metric-keys-fn [metric] - (fn [{:keys [predefined-aggregations raw datasets datasets-opts resps]}] + (fn [{:keys [predefined-aggregations raw queries query-specs resps]}] (let [{:keys [aggregations response-aggs]} - (get datasets-opts (first datasets))] + (get query-specs (first queries))] (cond raw (sort (keys (-> resps ffirst :ts-data first (get metric)))) @@ -821,7 +712,7 @@ (telemetry-csv-metric-keys-fn metric) (telemetry-csv-data-fn metric))) -(defn single-edge-datasets +(defn single-edge-queries [] {"availability-stats" {:metric "availability" :pre-process-fn (comp filter-available-before-period-end @@ -1145,18 +1036,7 @@ ret)}]) (query-availability options))) -(defn keep-response-aggs-only - [{:keys [predefined-aggregations response-aggs] :as _query-opts} resp] - (cond-> - resp - predefined-aggregations - (update-resp-ts-data-point-aggs - (fn [_ts-data-point aggs] - (if response-aggs - (select-keys aggs response-aggs) - aggs))))) - -(defn multi-edge-datasets +(defn multi-edge-queries [] (let [group-by-field (fn [field aggs] {:terms {:field field} @@ -1266,183 +1146,41 @@ :response-aggs [:sum-energy-consumption] :csv-export-fn (telemetry-csv-export-fn :power-consumption)}})) -(defn parse-params - [{:keys [uuid dataset from to granularity custom-es-aggregations] :as params} - {:keys [accept] :as _request}] - (let [datasets (if (coll? dataset) dataset [dataset]) - raw (= "raw" granularity) - predefined-aggregations (not (or raw custom-es-aggregations)) - custom-es-aggregations (cond-> custom-es-aggregations - (string? custom-es-aggregations) - json/read-str)] - (-> params - (assoc :mime-type (:mime accept)) - (assoc :datasets datasets) - (assoc :from (time/parse-date from)) - (assoc :to (time/parse-date to)) - (cond-> - uuid (assoc :id (u/resource-id "nuvlabox" uuid)) - raw (assoc :raw true) - predefined-aggregations (assoc :predefined-aggregations true) - custom-es-aggregations (assoc :custom-es-aggregations custom-es-aggregations))))) - -(defn throw-response-format-not-supported - [{:keys [mime-type] :as params}] - (when-not mime-type - (logu/log-and-throw-400 406 "Not Acceptable")) - params) - -(defn throw-mandatory-dataset-parameter - [{:keys [datasets] :as params}] - (when-not (seq datasets) (logu/log-and-throw-400 "dataset parameter is mandatory")) - params) - -(defn throw-mandatory-from-to-parameters - [{:keys [from to] :as params}] - (when-not from - (logu/log-and-throw-400 (str "from parameter is mandatory, with format iso8601 (uuuu-MM-dd'T'HH:mm:ss[.SSS]Z)"))) - (when-not to - (logu/log-and-throw-400 (str "to parameter is mandatory, with format iso8601 (uuuu-MM-dd'T'HH:mm:ss[.SSS]Z)"))) - params) - -(defn throw-from-not-before-to - [{:keys [from to] :as params}] - (when-not (time/before? from to) - (logu/log-and-throw-400 "from must be before to")) - params) - -(defn throw-mandatory-granularity-parameter - [{:keys [raw granularity custom-es-aggregations] :as params}] - (when (and (not raw) (not custom-es-aggregations) (empty? granularity)) - (logu/log-and-throw-400 "granularity parameter is mandatory")) - params) - -(defn throw-custom-es-aggregations-checks - [{:keys [custom-es-aggregations granularity] :as params}] - (when custom-es-aggregations - (when granularity - (logu/log-and-throw-400 "when custom-es-aggregations is specified, granularity parameter must be omitted"))) - params) - -(defn throw-too-many-data-points - [{:keys [from to granularity predefined-aggregations] :as params}] - (when predefined-aggregations - (let [max-n-buckets max-data-points - n-buckets (.dividedBy (time/duration from to) - (granularity->duration granularity))] - (when (> n-buckets max-n-buckets) - (logu/log-and-throw-400 "too many data points requested. Please restrict the time interval or increase the time granularity.")))) - params) +(defn assoc-nuvlabox-id + [{:keys [uuid] :as params}] + (cond-> params + uuid (assoc :id (u/resource-id "nuvlabox" uuid)))) -(defn granularity->ts-interval - "Converts from a string of the form - to an ElasticSearch interval string" - [granularity] - (let [[_ n unit] (re-matches #"(.*)-(.*)" (name granularity))] - (str n (case unit - "seconds" "s" - "minutes" "m" - "hours" "h" - "days" "d" - "weeks" "d" - "months" "M" - (logu/log-and-throw-400 (str "unrecognized value for granularity " granularity)))))) - -(defn assoc-base-query-opts - [{:keys [predefined-aggregations granularity filter] :as params} request] - (assoc params - :base-query-opts - (-> (select-keys params [:id :from :to :granularity - :raw :custom-es-aggregations :predefined-aggregations - :mode :int-atom]) - (assoc :request request) - (cond-> - filter - (assoc :cimi-filter filter) - predefined-aggregations - (assoc :ts-interval (granularity->ts-interval granularity)))))) - -(defn assoc-datasets-opts +(defn assoc-query-specs [{:keys [mode] :as params}] (assoc params - :datasets-opts + :query-specs (case mode - :single-edge-query (single-edge-datasets) - :multi-edge-query (multi-edge-datasets)))) - -(defn throw-unknown-datasets - [{:keys [datasets datasets-opts] :as params}] - (when-not (every? (set (keys datasets-opts)) datasets) - (logu/log-and-throw-400 (str "unknown datasets: " - (str/join "," (sort (set/difference (set datasets) - (set (keys datasets-opts)))))))) - params) + :single-edge-query (single-edge-queries) + :multi-edge-query (multi-edge-queries)))) -(defn throw-csv-multi-dataset - [{:keys [datasets mime-type] :as params}] - (when (and (= "text/csv" mime-type) (not= 1 (count datasets))) - (logu/log-and-throw-400 (str "exactly one dataset must be specified with accept header 'text/csv'"))) - params) - -(defn run-query - [base-query-opts datasets-opts dataset-key] - (let [{:keys [pre-process-fn query-fn post-process-fn] :as dataset-opts} (get datasets-opts dataset-key) - {:keys [predefined-aggregations] :as query-opts} (merge base-query-opts dataset-opts) - query-opts (if pre-process-fn (doall (pre-process-fn query-opts)) query-opts)] - (cond->> (doall (query-fn query-opts)) - post-process-fn ((fn [resp] (doall (second (post-process-fn [query-opts resp]))))) - predefined-aggregations (keep-response-aggs-only query-opts)))) - -(defn run-queries - [{:keys [datasets base-query-opts datasets-opts] :as params}] - (assoc params - :resps - (map (partial run-query base-query-opts datasets-opts) datasets))) - -(defn json-data-response - [{:keys [datasets resps]}] - (r/json-response (zipmap datasets resps))) - -(defn csv-response - [{:keys [datasets datasets-opts] :as options}] - (let [{:keys [csv-export-fn]} (get datasets-opts (first datasets))] - (when-not csv-export-fn - (logu/log-and-throw-400 (str "csv export not supported for dataset " (first datasets)))) - (r/csv-response "export.csv" (csv-export-fn options)))) - -(defn send-data-response - [{:keys [mime-type] :as options}] - (case mime-type - "application/json" - (json-data-response options) - "text/csv" - (csv-response options))) +(defn assoc-query + [{:keys [dataset] :as params}] + (cond-> params + dataset (assoc :query dataset))) (defn query-data [params request] (-> params - (parse-params request) - (throw-response-format-not-supported) - (throw-mandatory-dataset-parameter) - (throw-mandatory-from-to-parameters) - (throw-from-not-before-to) - (throw-mandatory-granularity-parameter) - (throw-too-many-data-points) - (throw-custom-es-aggregations-checks) - (assoc-base-query-opts request) - (assoc-datasets-opts) - (throw-unknown-datasets) - (throw-csv-multi-dataset) - (run-queries) - (send-data-response))) + (assoc-nuvlabox-id) + (assoc-query) + (assoc-query-specs) + (ts-data-utils/query-data request))) (defn gated-query-data "Only allow one call to query-data on availability of multiple edges at a time. Allow max 4 additional requests to wait at most 5 seconds to get access to computation." - [{:keys [mode dataset] :as params} request] - (let [datasets (if (coll? dataset) dataset [dataset])] + [{:keys [mode query dataset] :as params} request] + (let [query (or query dataset) + queries (if (coll? query) query [query])] (if (and (= :multi-edge-query mode) - (some #{"availability-stats" "availability-by-edge"} datasets)) + (some #{"availability-stats" "availability-by-edge"} queries)) (if (> @requesting-query-data 4) (logu/log-and-throw 503 "Server too busy") ;; retry for up to 5 seconds (or QUERY_DATA_MAX_ATTEMPTS * 100ms) @@ -1478,9 +1216,6 @@ (defn wrapped-query-data [params request] - (let [query-data (wrap-accept (partial gated-query-data params) - {:mime ["application/json" :qs 1 - "text/csv" :qs 0.5]})] + (let [query-data (ts-data-utils/wrap-query-data-accept (partial gated-query-data params))] (query-data request))) - diff --git a/code/src/sixsq/nuvla/server/resources/spec/timeseries.cljc b/code/src/sixsq/nuvla/server/resources/spec/timeseries.cljc index b3c1bae8e..e3108eae3 100644 --- a/code/src/sixsq/nuvla/server/resources/spec/timeseries.cljc +++ b/code/src/sixsq/nuvla/server/resources/spec/timeseries.cljc @@ -93,11 +93,14 @@ :json-schema/type "map" :json-schema/description "Timeseries query")) +(s/def ::custom-es-query any?) + (s/def ::query-definition (assoc (st/spec (su/only-keys :req-un [::query-name - ::query-type - ::query])) + ::query-type] + :opt-un [::query + ::custom-es-query])) :json-schema/type "map" :json-schema/description "Timeseries query definition")) @@ -107,4 +110,5 @@ (s/def ::schema (su/only-keys-maps common/common-attrs - {:req-un [::dimensions ::metrics ::queries]})) + {:req-un [::dimensions ::metrics] + :opt-un [::queries]})) diff --git a/code/src/sixsq/nuvla/server/resources/timeseries.clj b/code/src/sixsq/nuvla/server/resources/timeseries.clj index 131980d95..bf764c9c5 100644 --- a/code/src/sixsq/nuvla/server/resources/timeseries.clj +++ b/code/src/sixsq/nuvla/server/resources/timeseries.clj @@ -9,6 +9,7 @@ The `timeseries` resources represent a timeseries. [sixsq.nuvla.server.resources.common.std-crud :as std-crud] [sixsq.nuvla.server.resources.common.utils :as u] [sixsq.nuvla.server.resources.spec.timeseries :as timeseries] + [sixsq.nuvla.server.resources.timeseries.data-utils :as data-utils] [sixsq.nuvla.server.resources.timeseries.utils :as utils] [sixsq.nuvla.server.util.response :as r])) @@ -128,16 +129,9 @@ The `timeseries` resources represent a timeseries. ;; (defmethod crud/do-action [resource-type utils/action-data] - [{{uuid :uuid} :params body :body :as request}] + [{{uuid :uuid :as body} :body :keys [params] :as request}] (try - (let [id (str resource-type "/" uuid) - timeseries-index (utils/resource-id->timeseries-index id) - timeseries (-> (crud/retrieve-by-id-as-admin id) - (a/throw-cannot-manage request))] - (->> body - (utils/add-timestamp) - (utils/validate-datapoint timeseries) - (db/add-timeseries-datapoint timeseries-index))) + (data-utils/wrapped-query-data params request) (catch Exception e (or (ex-data e) (throw e))))) @@ -149,12 +143,13 @@ The `timeseries` resources represent a timeseries. [{:keys [id] :as resource} request] (let [insert-op (u/action-map id utils/action-insert) bulk-insert-op (u/action-map id utils/action-bulk-insert) + data-op (u/action-map id utils/action-data) can-manage? (a/can-manage? resource request)] (assoc resource :operations (cond-> [] can-manage? - (conj insert-op bulk-insert-op))))) + (conj insert-op bulk-insert-op data-op))))) ;; @@ -170,4 +165,3 @@ The `timeseries` resources represent a timeseries. (defn initialize [] (std-crud/initialize resource-type ::timeseries/schema)) - diff --git a/code/src/sixsq/nuvla/server/resources/timeseries/data_utils.clj b/code/src/sixsq/nuvla/server/resources/timeseries/data_utils.clj new file mode 100644 index 000000000..5521a0acb --- /dev/null +++ b/code/src/sixsq/nuvla/server/resources/timeseries/data_utils.clj @@ -0,0 +1,426 @@ +(ns sixsq.nuvla.server.resources.timeseries.data-utils + (:require [clojure.data.json :as json] + [clojure.set :as set] + [clojure.string :as str] + [ring.middleware.accept :refer [wrap-accept]] + [sixsq.nuvla.auth.acl-resource :as a] + [sixsq.nuvla.db.filter.parser :as parser] + [sixsq.nuvla.db.impl :as db] + [sixsq.nuvla.server.resources.common.crud :as crud] + [sixsq.nuvla.server.resources.timeseries.utils :as utils] + [sixsq.nuvla.server.util.log :as logu] + [sixsq.nuvla.server.util.response :as r] + [sixsq.nuvla.server.util.time :as time])) + +(def max-data-points 200) + +(defn update-resp-ts-data + [resp f] + (-> resp + vec + (update-in [0 :ts-data] (comp vec f)))) + +(defn update-resp-ts-data-points + [resp f] + (update-resp-ts-data + resp + (fn [ts-data] + (mapv f ts-data)))) + +(defn update-resp-ts-data-point-aggs + [resp f] + (update-resp-ts-data-points + resp + (fn [ts-data-point] + (update ts-data-point :aggregations (partial f ts-data-point))))) + +(defn granularity->duration + "Converts from a string of the form - to java.time duration" + [granularity] + (let [[_ n unit] (re-matches #"(.*)-(.*)" (name granularity))] + (try + (time/duration (Integer/parseInt n) (keyword unit)) + (catch Exception _ + (logu/log-and-throw-400 (str "unrecognized value for granularity " granularity)))))) + +(defn keep-response-aggs-only + [{:keys [predefined-aggregations response-aggs] :as _query-opts} resp] + (cond-> + resp + predefined-aggregations + (update-resp-ts-data-point-aggs + (fn [_ts-data-point aggs] + (if response-aggs + (select-keys aggs response-aggs) + aggs))))) + +(defn parse-params + [{:keys [query from to granularity custom-es-aggregations] :as params} + {:keys [accept] :as _request}] + (let [queries (if (coll? query) query [query]) + raw (= "raw" granularity) + predefined-aggregations (not (or raw custom-es-aggregations)) + custom-es-aggregations (cond-> custom-es-aggregations + (string? custom-es-aggregations) + json/read-str)] + (-> params + (assoc :mime-type (:mime accept)) + (assoc :queries queries) + (assoc :from (time/parse-date from)) + (assoc :to (time/parse-date to)) + (cond-> + raw (assoc :raw true) + predefined-aggregations (assoc :predefined-aggregations true) + custom-es-aggregations (assoc :custom-es-aggregations custom-es-aggregations))))) + +(defn throw-response-format-not-supported + [{:keys [mime-type] :as params}] + (when-not mime-type + (logu/log-and-throw-400 406 "Not Acceptable")) + params) + +(defn throw-mandatory-query-parameter + [{:keys [queries] :as params}] + (when-not (seq queries) (logu/log-and-throw-400 "query parameter is mandatory")) + params) + +(defn throw-mandatory-from-to-parameters + [{:keys [from to] :as params}] + (when-not from + (logu/log-and-throw-400 (str "from parameter is mandatory, with format iso8601 (uuuu-MM-dd'T'HH:mm:ss[.SSS]Z)"))) + (when-not to + (logu/log-and-throw-400 (str "to parameter is mandatory, with format iso8601 (uuuu-MM-dd'T'HH:mm:ss[.SSS]Z)"))) + params) + +(defn throw-from-not-before-to + [{:keys [from to] :as params}] + (when-not (time/before? from to) + (logu/log-and-throw-400 "from must be before to")) + params) + +(defn throw-mandatory-granularity-parameter + [{:keys [raw granularity custom-es-aggregations] :as params}] + (when (and (not raw) (not custom-es-aggregations) (empty? granularity)) + (logu/log-and-throw-400 "granularity parameter is mandatory")) + params) + +(defn throw-custom-es-aggregations-checks + [{:keys [custom-es-aggregations granularity] :as params}] + (when custom-es-aggregations + (when granularity + (logu/log-and-throw-400 "when custom-es-aggregations is specified, granularity parameter must be omitted"))) + params) + +(defn throw-too-many-data-points + [{:keys [from to granularity predefined-aggregations] :as params}] + (when predefined-aggregations + (let [max-n-buckets max-data-points + n-buckets (.dividedBy (time/duration from to) + (granularity->duration granularity))] + (when (> n-buckets max-n-buckets) + (logu/log-and-throw-400 "too many data points requested. Please restrict the time interval or increase the time granularity.")))) + params) + +(defn granularity->ts-interval + "Converts from a string of the form - to an ElasticSearch interval string" + [granularity] + (let [[_ n unit] (re-matches #"(.*)-(.*)" (name granularity))] + (str n (case unit + "seconds" "s" + "minutes" "m" + "hours" "h" + "days" "d" + "weeks" "d" + "months" "M" + (logu/log-and-throw-400 (str "unrecognized value for granularity " granularity)))))) + +(defn assoc-request + [params request] + (assoc params :request request)) + +(defn assoc-cimi-filter + [{:keys [filter] :as params}] + (cond-> params filter (assoc :cimi-filter filter))) + +(defn assoc-ts-interval + [{:keys [predefined-aggregations granularity] :as params}] + (cond-> params + predefined-aggregations + (assoc :ts-interval (granularity->ts-interval granularity)))) + +(defn throw-unknown-queries + [{:keys [queries query-specs] :as params}] + (when-not (every? (set (keys query-specs)) queries) + (logu/log-and-throw-400 (str "unknown queries: " + (str/join "," (sort (set/difference (set queries) + (set (keys query-specs)))))))) + params) + +(defn throw-csv-multi-query + [{:keys [queries mime-type] :as params}] + (when (and (= "text/csv" mime-type) (not= 1 (count queries))) + (logu/log-and-throw-400 (str "exactly one query must be specified with accept header 'text/csv'"))) + params) + +(defn run-query + [params query-specs query-key] + (let [{:keys [pre-process-fn query-fn post-process-fn] :as query-spec} (get query-specs query-key) + {:keys [predefined-aggregations] :as query-opts} (merge params query-spec) + query-opts (if pre-process-fn (doall (pre-process-fn query-opts)) query-opts)] + (cond->> (doall (query-fn query-opts)) + post-process-fn ((fn [resp] (doall (second (post-process-fn [query-opts resp]))))) + predefined-aggregations (keep-response-aggs-only query-opts)))) + +(defn run-queries + [{:keys [queries query-specs] :as params}] + (assoc params + :resps + (map (partial run-query params query-specs) queries))) + +(defn json-data-response + [{:keys [queries resps]}] + (r/json-response (zipmap queries resps))) + +(defn csv-response + [{:keys [queries query-specs] :as options}] + (let [{:keys [csv-export-fn]} (get query-specs (first queries))] + (when-not csv-export-fn + (logu/log-and-throw-400 (str "csv export not supported for query " (first queries)))) + (r/csv-response "export.csv" (csv-export-fn options)))) + +(defn send-data-response + [{:keys [mime-type] :as options}] + (case mime-type + "application/json" + (json-data-response options) + "text/csv" + (csv-response options))) + +(defn query-data + [params request] + (-> params + (parse-params request) + (throw-response-format-not-supported) + (throw-mandatory-query-parameter) + (throw-mandatory-from-to-parameters) + (throw-from-not-before-to) + (throw-mandatory-granularity-parameter) + (throw-too-many-data-points) + (throw-custom-es-aggregations-checks) + (assoc-request request) + (assoc-cimi-filter) + (assoc-ts-interval) + (throw-unknown-queries) + (throw-csv-multi-query) + (run-queries) + (send-data-response))) + +(defn wrap-query-data-accept + [handler] + (wrap-accept handler + {:mime ["application/json" :qs 1 + "text/csv" :qs 0.5]})) + +(defn assoc-timeseries + [{uuid :uuid :as params} request] + (let [id (str "timeseries/" uuid) + timeseries-index (utils/resource-id->timeseries-index id) + timeseries (-> (crud/retrieve-by-id-as-admin id) + (a/throw-cannot-manage request))] + (assoc params + :timeseries-index timeseries-index + :timeseries timeseries))) + +(defn ->resp-dimensions + [{:keys [timeseries dimensions-filters]}] + (->> (for [{:keys [field-name]} (:dimensions timeseries)] + (let [v (get dimensions-filters field-name)] + (cond + (nil? v) + {field-name "all"} + + (= (count v) 1) + {field-name (first v)} + + (pos? v) + {field-name {:count (count v)}}))) + (into {}))) + +(defn ->predefined-aggregations-resp + [{:keys [aggregations ->resp-dimensions-fn] group-by-field :group-by :as params} resp] + (let [ts-data (fn [tsds-stats] + (map + (fn [{:keys [key_as_string doc_count] :as bucket}] + {:timestamp key_as_string + :doc-count doc_count + :aggregations (->> (keys aggregations) + (map keyword) + (select-keys bucket))}) + (:buckets tsds-stats))) + hits (second resp)] + (if group-by-field + (for [{:keys [key tsds-stats]} (get-in resp [0 :aggregations :by-field :buckets])] + (cond-> + {:dimensions (assoc (->resp-dimensions-fn params) group-by-field key) + :ts-data (ts-data tsds-stats)} + (seq hits) (assoc :hits hits))) + [(cond-> + {:dimensions (->resp-dimensions-fn params) + :ts-data (ts-data (get-in resp [0 :aggregations :tsds-stats]))} + (seq hits) (assoc :hits hits))]))) + +(defn ->custom-es-aggregations-resp + [{:keys [->resp-dimensions-fn] :as params} resp] + (let [ts-data (fn [tsds-stats] + (map + (fn [{:keys [key_as_string doc_count] :as bucket}] + {:timestamp key_as_string + :doc-count doc_count + :aggregations (dissoc bucket :key_as_string :key :doc_count)}) + (:buckets tsds-stats)))] + [(merge {:dimensions (->resp-dimensions-fn params)} + (into {} (for [agg-key (keys (get-in resp [0 :aggregations]))] + [agg-key (ts-data (get-in resp [0 :aggregations agg-key]))])))])) + +(defn ->raw-resp + [{:keys [->resp-dimensions-fn] :as params} resp] + (let [hits (second resp)] + [{:dimensions (->resp-dimensions-fn params) + :ts-data (sort-by :timestamp hits)}])) + +(defn ->ts-query-resp + [{:keys [predefined-aggregations custom-es-aggregations raw] :as params} resp] + (cond + predefined-aggregations + (->predefined-aggregations-resp params resp) + + raw + (->raw-resp params resp) + + custom-es-aggregations + (->custom-es-aggregations-resp params resp))) + +(defn build-aggregations-clause + [{:keys [predefined-aggregations raw custom-es-aggregations from to ts-interval aggregations] group-by-field :group-by}] + (cond + raw + {} ;; send an empty :tsds-aggregation to avoid acl checks. TODO: find a cleaner way + + predefined-aggregations + (let [tsds-aggregations {:tsds-stats + {:date_histogram + {:field "@timestamp" + :fixed_interval ts-interval + :min_doc_count 0 + :extended_bounds {:min (time/to-str from) + :max (time/to-str to)}} + :aggregations (or aggregations {})}}] + (if group-by-field + {:aggregations + {:by-field + {:terms {:field group-by-field} + :aggregations tsds-aggregations}}} + {:aggregations tsds-aggregations})) + + custom-es-aggregations + {:aggregations custom-es-aggregations})) + +(defn dimension-filter->cimi-filter + [[dimension values]] + (str dimension "=[" (str/join " " (map #(str "'" % "'") values)) "]")) + +(defn build-ts-query [{:keys [last dimensions-filters from to additional-filters orderby] :as options}] + (let [time-range-filter (str "@timestamp>'" (time/to-str from) "'" + " and " + "@timestamp<'" (time/to-str to) "'") + aggregation-clause (build-aggregations-clause options)] + (cond-> + {:cimi-params (cond-> + {:last (or last 0) + :filter + (parser/parse-cimi-filter + (str "(" + (apply str + (interpose " and " + (concat [time-range-filter] + (map dimension-filter->cimi-filter dimensions-filters) + additional-filters))) + ")"))} + orderby (assoc :orderby orderby))} + aggregation-clause + (assoc :params {:tsds-aggregation (json/write-str aggregation-clause)})))) + +(defn build-query [{:keys [raw] :as options}] + (-> (build-ts-query (cond-> options + raw (assoc :last max-data-points))) + (assoc :no-prefix true))) + +(defn generic-query-fn + [{:keys [timeseries-index] :as params}] + (->> (build-query params) + (crud/query-as-admin timeseries-index) + (->ts-query-resp (assoc params :->resp-dimensions-fn ->resp-dimensions)))) + +(defmulti ts-query->query-spec (fn [{:keys [query-type]}] query-type)) + +(defmethod ts-query->query-spec :default + [{:keys [query-type]}] + (logu/log-and-throw-400 (str "unrecognized query type " query-type))) + +(defn parse-aggregations + [aggregations] + (->> aggregations + (map (fn [{:keys [aggregation-name aggregation-type field-name]}] + [aggregation-name {(keyword aggregation-type) {:field field-name}}])) + (into {}))) + +(defmethod ts-query->query-spec "standard" + [{:keys [query] :as _ts-query}] + {:query-fn generic-query-fn + :aggregations (some-> query :aggregations parse-aggregations) + ; :csv-export-fn (telemetry-csv-export-fn :cpu) + }) + +(defmethod ts-query->query-spec "custom-es-query" + [{:keys [custom-es-query] :as _ts-query}] + {:query-fn generic-query-fn + :aggregations (some-> custom-es-query :aggregations) + ; :csv-export-fn (telemetry-csv-export-fn :cpu) + }) + +(defn assoc-query-specs + [{:keys [timeseries] :as params}] + (let [query-specs (-> (get timeseries :queries) + (->> (group-by :query-name)) + (update-vals (comp ts-query->query-spec first)))] + (cond-> params + query-specs (assoc :query-specs query-specs)))) + +(defn parse-dimension-filter + [s] + (let [[_ dimension value] (re-matches #"(.*)=(.*)" s)] + [dimension value])) + +(defn assoc-dimensions-filters + [{:keys [dimension-filter] :as params}] + (let [dimension-filter (when dimension-filter + (if (coll? dimension-filter) dimension-filter [dimension-filter])) + dimensions-filters (-> (->> dimension-filter + (map parse-dimension-filter) + (group-by first)) + (update-vals #(map second %)))] + (cond-> params + dimensions-filters (assoc :dimensions-filters dimensions-filters)))) + +(defn generic-ts-query-data + [params request] + (-> params + (assoc-timeseries request) + (assoc-query-specs) + (assoc-dimensions-filters) + (query-data request))) + +(defn wrapped-query-data + [params request] + (let [query-data (wrap-query-data-accept (partial generic-ts-query-data params))] + (query-data request))) diff --git a/code/test/sixsq/nuvla/server/resources/nuvlabox_status_2_lifecycle_test.clj b/code/test/sixsq/nuvla/server/resources/nuvlabox_status_2_lifecycle_test.clj index b2e95c65d..4e15bea51 100644 --- a/code/test/sixsq/nuvla/server/resources/nuvlabox_status_2_lifecycle_test.clj +++ b/code/test/sixsq/nuvla/server/resources/nuvlabox_status_2_lifecycle_test.clj @@ -819,7 +819,7 @@ (ltu/body->edn) (ltu/body) :message))] - (is (= "exactly one dataset must be specified with accept header 'text/csv'" + (is (= "exactly one query must be specified with accept header 'text/csv'" (invalid-request {:accept-header "text/csv" :datasets ["cpu-stats" "network-stats"] :from (time/minus now (time/duration-unit 1 :days)) @@ -846,7 +846,7 @@ :from now :to now :granularity "1-days"}))) - (is (= "unknown datasets: invalid-1,invalid-2" + (is (= "unknown queries: invalid-1,invalid-2" (invalid-request {:datasets ["invalid-1" "cpu-stats" "invalid-2"] :from (time/minus now (time/duration-unit 1 :days)) :to now @@ -1273,7 +1273,7 @@ (ltu/body->edn) (ltu/body) :message))] - (is (= "exactly one dataset must be specified with accept header 'text/csv'" + (is (= "exactly one query must be specified with accept header 'text/csv'" (invalid-request {:accept-header "text/csv" :datasets ["cpu-stats" "network-stats"] :from (time/minus now (time/duration-unit 1 :days)) @@ -1284,7 +1284,7 @@ :from now :to now :granularity "1-days"}))) - (is (= "unknown datasets: invalid-1,invalid-2" + (is (= "unknown queries: invalid-1,invalid-2" (invalid-request {:datasets ["invalid-1" "cpu-stats" "invalid-2"] :from (time/minus now (time/duration-unit 1 :days)) :to now diff --git a/code/test/sixsq/nuvla/server/resources/spec/timeseries_test.cljc b/code/test/sixsq/nuvla/server/resources/spec/timeseries_test.cljc index a39f8e185..358401866 100644 --- a/code/test/sixsq/nuvla/server/resources/spec/timeseries_test.cljc +++ b/code/test/sixsq/nuvla/server/resources/spec/timeseries_test.cljc @@ -56,4 +56,7 @@ (stu/is-invalid ::timeseries/schema (assoc valid-entry :unknown "value")) (doseq [attr #{:metrics :dimensions}] - (stu/is-invalid ::timeseries/schema (dissoc valid-entry attr))))) + (stu/is-invalid ::timeseries/schema (dissoc valid-entry attr))) + + (doseq [attr #{:queries}] + (stu/is-valid ::timeseries/schema (dissoc valid-entry attr))))) diff --git a/code/test/sixsq/nuvla/server/resources/timeseries_lifecycle_test.clj b/code/test/sixsq/nuvla/server/resources/timeseries_lifecycle_test.clj index 743b3f619..1be44a7e8 100644 --- a/code/test/sixsq/nuvla/server/resources/timeseries_lifecycle_test.clj +++ b/code/test/sixsq/nuvla/server/resources/timeseries_lifecycle_test.clj @@ -3,6 +3,7 @@ [clojure.data.json :as json] [clojure.test :refer [deftest is testing use-fixtures]] [peridot.core :refer [content-type header request session]] + [ring.util.codec :as rc] [sixsq.nuvla.db.impl :as db] [sixsq.nuvla.server.app.params :as p] [sixsq.nuvla.server.middleware.authn-info :refer [authn-info-header]] @@ -16,6 +17,47 @@ (def base-uri (str p/service-context t/resource-type)) +(def dimension1 "test-dimension1") + +(def metric1 "test-metric1") +(def metric2 "test-metric2") + +(def query1 "test-query1") +(def query2 "test-query2") +(def aggregation1 "test-metric1-avg") + +(def valid-entry {:dimensions [{:field-name dimension1 + :field-type "keyword"}] + :metrics [{:field-name metric1 + :field-type "double" + :metric-type "gauge"} + {:field-name metric2 + :field-type "long" + :metric-type "counter" + :optional true}] + :queries [{:query-name query1 + :query-type "standard" + :query {:aggregations [{:aggregation-name aggregation1 + :aggregation-type "avg" + :field-name metric1}]}} + {:query-name query2 + :query-type "custom-es-query" + :custom-es-query {:aggregations + {:agg1 {:date_histogram + {:field "@timestamp" + :fixed_interval "1d" + :min_doc_count 0} + :aggregations {:custom-agg {:stats {:field metric1}}}}}}}]}) + +(defn create-timeseries + [session entry] + (-> session + (request base-uri + :request-method :post + :body (json/write-str entry)) + (ltu/body->edn) + (ltu/is-status 201) + (ltu/location))) (deftest lifecycle (let [session-anon (-> (ltu/ring-app) @@ -23,28 +65,8 @@ (content-type "application/json")) session-user (header session-anon authn-info-header "user/jane user/jane group/nuvla-user group/nuvla-anon") - session-admin (header session-anon authn-info-header - "group/nuvla-admin group/nuvla-admin group/nuvla-user group/nuvla-anon") - dimension1 "test-dimension1" - metric1 "test-metric1" - metric2 "test-metric2" - entry {:dimensions [{:field-name dimension1 - :field-type "keyword"}] - :metrics [{:field-name metric1 - :field-type "double" - :metric-type "gauge"} - {:field-name metric2 - :field-type "long" - :metric-type "counter" - :optional true}]} ;; create timeseries - ts-id (-> session-user - (request base-uri - :request-method :post - :body (json/write-str entry)) - (ltu/body->edn) - (ltu/is-status 201) - (ltu/location)) + ts-id (create-timeseries session-user valid-entry) ts-url (str p/service-context ts-id) ;; retrieve timeseries ts-response (-> session-user @@ -57,10 +79,10 @@ ts (db/retrieve-timeseries ts-index) insert-op-url (ltu/get-op-url ts-response tu/action-insert) now (time/now)] - (is (= (assoc entry + (is (= (assoc valid-entry :id ts-id :resource-type "timeseries") - (select-keys ts-resource [:resource-type :id :dimensions :metrics]))) + (select-keys ts-resource [:resource-type :id :dimensions :metrics :queries]))) (is (pos? (count (:data_streams ts)))) (testing "query timeseries" @@ -70,16 +92,16 @@ (ltu/is-status 200) (ltu/is-count 1) (ltu/body))] - (is (= entry (-> query-response - :resources - first - (select-keys [:dimensions :metrics])))))) + (is (= valid-entry (-> query-response + :resources + first + (select-keys [:dimensions :metrics :queries])))))) (testing "insert timeseries datapoint" - (let [datapoint {:timestamp (time/to-str now) - dimension1 "d1-val1" - metric1 3.14 - metric2 1000}] + (let [datapoint {:timestamp (time/to-str now) + dimension1 "d1-val1" + metric1 3.14 + metric2 1000}] (testing "datapoint validation error: missing dimensions" (-> session-user (request insert-op-url @@ -279,6 +301,128 @@ ;; timeseries is also deleted (is (thrown? Exception (db/retrieve-timeseries ts-index)))))) +(deftest query + (let [session-anon (-> (ltu/ring-app) + session + (content-type "application/json")) + session-user (header session-anon authn-info-header + "user/jane user/jane group/nuvla-user group/nuvla-anon") + ts-id (create-timeseries session-user valid-entry) + ts-url (str p/service-context ts-id) + ;; retrieve timeseries + ts-response (-> session-user + (request ts-url) + (ltu/body->edn) + (ltu/is-status 200) + (ltu/is-operation-present tu/action-insert)) + bulk-insert-op-url (ltu/get-op-url ts-response tu/action-bulk-insert) + data-op-url (ltu/get-op-url ts-response tu/action-data) + + now (time/now) + now-1h (time/minus now (time/duration-unit 1 :hours)) + d1-val1 "d1q-val1" + d1-val2 "d1q-val2" + datapoints [{:timestamp (time/to-str now-1h) + dimension1 d1-val1 + metric1 10.0 + metric2 1} + {:timestamp (time/to-str now-1h) + dimension1 d1-val2 + metric1 20.0 + metric2 2}]] + + (testing "successful bulk insert" + (-> session-user + (request bulk-insert-op-url + :headers {"bulk" true} + :request-method :post + :body (json/write-str datapoints)) + (ltu/body->edn) + (ltu/is-status 200))) + + (ltu/refresh-es-indices) + + (testing "Query metrics" + (let [midnight-today (time/truncated-to-days now) + midnight-yesterday (time/truncated-to-days (time/minus now (time/duration-unit 1 :days))) + metrics-request (fn [{:keys [dimensions-filters queries from from-str to to-str granularity accept-header]}] + (-> session-user + (content-type "application/x-www-form-urlencoded") + (cond-> accept-header (header "accept" accept-header)) + (request data-op-url + :body (rc/form-encode + (cond-> + {:query queries + :from (if from (time/to-str from) from-str) + :to (if to (time/to-str to) to-str)} + dimensions-filters (assoc :dimension-filter dimensions-filters) + granularity (assoc :granularity granularity))))))] + (testing "basic query" + (let [from (time/minus now (time/duration-unit 1 :days)) + to now + metric-data (-> (metrics-request {:queries [query1] + :from from + :to to + :granularity "1-days"}) + (ltu/is-status 200) + (ltu/body->edn) + (ltu/body))] + (is (= [{:dimensions {(keyword dimension1) "all"} + :ts-data [{:timestamp (time/to-str midnight-yesterday) + :doc-count 0 + :aggregations {(keyword aggregation1) {:value nil}}} + {:timestamp (time/to-str midnight-today) + :doc-count 2 + :aggregations {(keyword aggregation1) {:value 15.0}}}]}] + (get metric-data (keyword query1)))))) + (testing "basic query with dimension filter" + (let [from (time/minus now (time/duration-unit 1 :days)) + to now + metric-data (-> (metrics-request {:dimensions-filters [(str dimension1 "=" d1-val1)] + :queries [query1] + :from from + :to to + :granularity "1-days"}) + (ltu/is-status 200) + (ltu/body->edn) + (ltu/body))] + (is (= [{:dimensions {(keyword dimension1) d1-val1} + :ts-data [{:timestamp (time/to-str midnight-yesterday) + :doc-count 0 + :aggregations {(keyword aggregation1) {:value nil}}} + {:timestamp (time/to-str midnight-today) + :doc-count 1 + :aggregations {(keyword aggregation1) {:value 10.0}}}]}] + (get metric-data (keyword query1)))))) + (testing "raw query" + (let [from (time/minus now (time/duration-unit 1 :days)) + to now + metric-data (-> (metrics-request {:queries [query1] + :from from + :to to + :granularity "raw"}) + (ltu/is-status 200) + (ltu/body->edn) + (ltu/body))] + (is (= [{:dimensions {(keyword dimension1) "all"} + :ts-data (set (map #(update-keys % keyword) datapoints))}] + (-> (get metric-data (keyword query1)) + (update-in [0 :ts-data] set)))))) + #_(testing "custom es query" + (let [from (time/minus now (time/duration-unit 1 :days)) + to now + metric-data (-> (metrics-request {:queries [query2] + :from from + :to to + :granularity "1-days"}) + (ltu/is-status 200) + (ltu/body->edn) + (ltu/body))] + (is (= [{:dimensions {(keyword dimension1) "all"} + :ts-data (set (map #(update-keys % keyword) datapoints))}] + (-> (get metric-data (keyword query1)) + (update-in [0 :ts-data] set)))))))))) + (deftest bad-methods (let [resource-uri (str p/service-context (u/new-resource-id t/resource-type))] (ltu/verify-405-status [[resource-uri :post]]))) From 0aa6869d5542ed45ce67de4775304e97b00669a1 Mon Sep 17 00:00:00 2001 From: Alessandro Bellucci Date: Tue, 23 Apr 2024 08:31:28 +0200 Subject: [PATCH 08/28] Fix mapping of ::custom-es-query --- code/src/sixsq/nuvla/server/resources/spec/timeseries.cljc | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/code/src/sixsq/nuvla/server/resources/spec/timeseries.cljc b/code/src/sixsq/nuvla/server/resources/spec/timeseries.cljc index e3108eae3..7564aef5f 100644 --- a/code/src/sixsq/nuvla/server/resources/spec/timeseries.cljc +++ b/code/src/sixsq/nuvla/server/resources/spec/timeseries.cljc @@ -93,7 +93,10 @@ :json-schema/type "map" :json-schema/description "Timeseries query")) -(s/def ::custom-es-query any?) +(s/def ::custom-es-query + (-> (st/spec (su/constrained-map keyword? any?)) + (assoc :json-schema/type "map" + :json-schema/description "custom ElasticSearch query"))) (s/def ::query-definition (assoc (st/spec (su/only-keys From 2617e745dbdf6cc973cff75ca28f74d4d6219bf9 Mon Sep 17 00:00:00 2001 From: Alessandro Bellucci Date: Tue, 23 Apr 2024 10:42:46 +0200 Subject: [PATCH 09/28] Optional metrics can be omitted --- .../src/sixsq/nuvla/server/resources/timeseries/utils.clj | 3 ++- .../nuvla/server/resources/timeseries_lifecycle_test.clj | 8 ++++++++ 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/code/src/sixsq/nuvla/server/resources/timeseries/utils.clj b/code/src/sixsq/nuvla/server/resources/timeseries/utils.clj index 8ea23c842..eee92887f 100644 --- a/code/src/sixsq/nuvla/server/resources/timeseries/utils.clj +++ b/code/src/sixsq/nuvla/server/resources/timeseries/utils.clj @@ -118,7 +118,8 @@ (defn throw-wrong-types [{:keys [dimensions metrics] :as _timeseries} datapoint] (doseq [{:keys [field-name] :as field} (concat dimensions metrics)] - (throw-wrong-type field (get datapoint (keyword field-name)))) + (some->> (get datapoint (keyword field-name)) + (throw-wrong-type field))) datapoint) (defn throw-extra-keys diff --git a/code/test/sixsq/nuvla/server/resources/timeseries_lifecycle_test.clj b/code/test/sixsq/nuvla/server/resources/timeseries_lifecycle_test.clj index 1be44a7e8..fe45ec99d 100644 --- a/code/test/sixsq/nuvla/server/resources/timeseries_lifecycle_test.clj +++ b/code/test/sixsq/nuvla/server/resources/timeseries_lifecycle_test.clj @@ -159,6 +159,14 @@ :request-method :post :body (json/write-str (dissoc datapoint :timestamp))) (ltu/body->edn) + (ltu/is-status 201))) + + (testing "optional metrics can be omitted" + (-> session-user + (request insert-op-url + :request-method :post + :body (json/write-str (dissoc datapoint :timestamp metric2))) + (ltu/body->edn) (ltu/is-status 201))))) (testing "bulk insert timeseries datapoints" From 229084de5bf38bddafcec2a33ceff946fddec777 Mon Sep 17 00:00:00 2001 From: Alessandro Bellucci Date: Tue, 23 Apr 2024 14:26:14 +0200 Subject: [PATCH 10/28] Added check on dimension filters --- .../server/resources/timeseries/data_utils.clj | 14 +++++++++++++- .../server/resources/timeseries_lifecycle_test.clj | 12 ++++++++++++ 2 files changed, 25 insertions(+), 1 deletion(-) diff --git a/code/src/sixsq/nuvla/server/resources/timeseries/data_utils.clj b/code/src/sixsq/nuvla/server/resources/timeseries/data_utils.clj index 5521a0acb..0bcb2c31e 100644 --- a/code/src/sixsq/nuvla/server/resources/timeseries/data_utils.clj +++ b/code/src/sixsq/nuvla/server/resources/timeseries/data_utils.clj @@ -242,7 +242,7 @@ (= (count v) 1) {field-name (first v)} - (pos? v) + (pos? (count v)) {field-name {:count (count v)}}))) (into {}))) @@ -412,12 +412,24 @@ (cond-> params dimensions-filters (assoc :dimensions-filters dimensions-filters)))) +(defn throw-invalid-dimensions + [{:keys [dimensions-filters] {:keys [dimensions]} :timeseries :as params}] + (let [dimensions-filters-keys (set (keys dimensions-filters)) + dimensions-field-names (set (map :field-name dimensions))] + (when (seq dimensions-filters-keys) + (when-not (set/subset? dimensions-filters-keys dimensions-field-names) + (throw (r/ex-response (str "invalid dimensions: " + (str/join "," (set/difference dimensions-filters-keys dimensions-field-names))) + 400))))) + params) + (defn generic-ts-query-data [params request] (-> params (assoc-timeseries request) (assoc-query-specs) (assoc-dimensions-filters) + (throw-invalid-dimensions) (query-data request))) (defn wrapped-query-data diff --git a/code/test/sixsq/nuvla/server/resources/timeseries_lifecycle_test.clj b/code/test/sixsq/nuvla/server/resources/timeseries_lifecycle_test.clj index fe45ec99d..a261fd893 100644 --- a/code/test/sixsq/nuvla/server/resources/timeseries_lifecycle_test.clj +++ b/code/test/sixsq/nuvla/server/resources/timeseries_lifecycle_test.clj @@ -402,6 +402,18 @@ :doc-count 1 :aggregations {(keyword aggregation1) {:value 10.0}}}]}] (get metric-data (keyword query1)))))) + (testing "basic query with wrong dimension filter" + (let [from (time/minus now (time/duration-unit 1 :days)) + to now] + (-> (metrics-request {:dimensions-filters ["wrong-dimension=w1" + "wrong-dimension=w2"] + :queries [query1] + :from from + :to to + :granularity "1-days"}) + (ltu/body->edn) + (ltu/is-status 400) + (ltu/is-key-value :message "invalid dimensions: wrong-dimension")))) (testing "raw query" (let [from (time/minus now (time/duration-unit 1 :days)) to now From 3d727866bd725c9c5ff9ef8079e84668558549e3 Mon Sep 17 00:00:00 2001 From: Alessandro Bellucci Date: Tue, 23 Apr 2024 16:11:30 +0200 Subject: [PATCH 11/28] At least one metric and one dimension required --- .../server/resources/spec/timeseries.cljc | 4 +- .../resources/timeseries_lifecycle_test.clj | 45 ++++++++++++------- 2 files changed, 32 insertions(+), 17 deletions(-) diff --git a/code/src/sixsq/nuvla/server/resources/spec/timeseries.cljc b/code/src/sixsq/nuvla/server/resources/spec/timeseries.cljc index 7564aef5f..d69e34b61 100644 --- a/code/src/sixsq/nuvla/server/resources/spec/timeseries.cljc +++ b/code/src/sixsq/nuvla/server/resources/spec/timeseries.cljc @@ -25,7 +25,7 @@ :json-schema/description "Timeseries dimension")) (s/def ::dimensions - (-> (st/spec (s/coll-of ::dimension :kind vector? :distinct true)) + (-> (st/spec (s/coll-of ::dimension :kind vector? :distinct true :min-count 1)) (assoc :json-schema/description "Timeseries dimensions"))) (def metric-types #{"gauge" "counter"}) @@ -50,7 +50,7 @@ :json-schema/description "Timeseries metric")) (s/def ::metrics - (-> (st/spec (s/coll-of ::metric :kind vector? :distinct true)) + (-> (st/spec (s/coll-of ::metric :kind vector? :distinct true :min-count 1)) (assoc :json-schema/description "Timeseries metrics"))) (s/def ::query-name diff --git a/code/test/sixsq/nuvla/server/resources/timeseries_lifecycle_test.clj b/code/test/sixsq/nuvla/server/resources/timeseries_lifecycle_test.clj index a261fd893..b18437bee 100644 --- a/code/test/sixsq/nuvla/server/resources/timeseries_lifecycle_test.clj +++ b/code/test/sixsq/nuvla/server/resources/timeseries_lifecycle_test.clj @@ -85,6 +85,21 @@ (select-keys ts-resource [:resource-type :id :dimensions :metrics :queries]))) (is (pos? (count (:data_streams ts)))) + (testing "invalid timeseries creation attempts" + (-> session-user + (request base-uri + :request-method :post + :body (json/write-str (assoc valid-entry :dimensions []))) + (ltu/body->edn) + (ltu/is-status 400)) + + (-> session-user + (request base-uri + :request-method :post + :body (json/write-str (assoc valid-entry :metrics []))) + (ltu/body->edn) + (ltu/is-status 400))) + (testing "query timeseries" (let [query-response (-> session-user (request base-uri) @@ -403,8 +418,8 @@ :aggregations {(keyword aggregation1) {:value 10.0}}}]}] (get metric-data (keyword query1)))))) (testing "basic query with wrong dimension filter" - (let [from (time/minus now (time/duration-unit 1 :days)) - to now] + (let [from (time/minus now (time/duration-unit 1 :days)) + to now] (-> (metrics-request {:dimensions-filters ["wrong-dimension=w1" "wrong-dimension=w2"] :queries [query1] @@ -429,19 +444,19 @@ (-> (get metric-data (keyword query1)) (update-in [0 :ts-data] set)))))) #_(testing "custom es query" - (let [from (time/minus now (time/duration-unit 1 :days)) - to now - metric-data (-> (metrics-request {:queries [query2] - :from from - :to to - :granularity "1-days"}) - (ltu/is-status 200) - (ltu/body->edn) - (ltu/body))] - (is (= [{:dimensions {(keyword dimension1) "all"} - :ts-data (set (map #(update-keys % keyword) datapoints))}] - (-> (get metric-data (keyword query1)) - (update-in [0 :ts-data] set)))))))))) + (let [from (time/minus now (time/duration-unit 1 :days)) + to now + metric-data (-> (metrics-request {:queries [query2] + :from from + :to to + :granularity "1-days"}) + (ltu/is-status 200) + (ltu/body->edn) + (ltu/body))] + (is (= [{:dimensions {(keyword dimension1) "all"} + :ts-data (set (map #(update-keys % keyword) datapoints))}] + (-> (get metric-data (keyword query1)) + (update-in [0 :ts-data] set)))))))))) (deftest bad-methods (let [resource-uri (str p/service-context (u/new-resource-id t/resource-type))] From 0233f153d6084fad45eb87c057929f9ce3baa7ee Mon Sep 17 00:00:00 2001 From: Alessandro Bellucci Date: Tue, 23 Apr 2024 17:05:39 +0200 Subject: [PATCH 12/28] Update timeseries write index mapping on timeseries update --- code/src/sixsq/nuvla/db/es/binding.clj | 40 +++++++++++++------ .../resources/timeseries_lifecycle_test.clj | 7 ++++ 2 files changed, 34 insertions(+), 13 deletions(-) diff --git a/code/src/sixsq/nuvla/db/es/binding.clj b/code/src/sixsq/nuvla/db/es/binding.clj index 20eda6168..db667192c 100644 --- a/code/src/sixsq/nuvla/db/es/binding.clj +++ b/code/src/sixsq/nuvla/db/es/binding.clj @@ -466,10 +466,21 @@ error (:error body)] (log/error "unexpected status code when creating" datastream-index-name "datastream (" status "). " (or error e))))))))) +(defn edit-datastream + [client datastream-index-name new-mappings] + (let [{{:keys [acknowledged]} :body} + (spandex/request client {:url [datastream-index-name :_mapping] + :query-string {:write_index_only true} + :method :put + :body new-mappings})] + (if acknowledged + (log/info datastream-index-name "datastream updated") + (log/warn datastream-index-name "datastream may or may not have been updated")))) + (defn delete-datastream [client datastream-index-name] (try - (let [{:keys [status]} (spandex/request client {:url [:_data_stream datastream-index-name] + (let [{:keys [status]} (spandex/request client {:url [:_data_stream datastream-index-name] :method :delete})] (if (= 200 status) (log/debug datastream-index-name "datastream deleted") @@ -493,10 +504,10 @@ (let [ilm-policy-name (create-or-update-lifecycle-policy client timeseries-id ilm-policy)] (create-or-update-timeseries-template client timeseries-id mappings {:routing-path routing-path - :lifecycle-name ilm-policy-name - :look-ahead-time look-ahead-time - :look-back-time look-back-time - :start-time start-time}) + :lifecycle-name ilm-policy-name + :look-ahead-time look-ahead-time + :look-back-time look-back-time + :start-time start-time}) (create-datastream client timeseries-id))) (defn retrieve-timeseries-impl @@ -521,15 +532,18 @@ look-back-time look-ahead-time start-time] + :or {ilm-policy hot-warm-cold-delete-policy + look-back-time "7d"} :as _options}] - (when ilm-policy - (create-or-update-lifecycle-policy client timeseries-id ilm-policy)) - (create-or-update-timeseries-template - client timeseries-id mappings - {:routing-path routing-path - :look-ahead-time look-ahead-time - :look-back-time look-back-time - :start-time start-time})) + (let [ilm-policy-name (create-or-update-lifecycle-policy client timeseries-id ilm-policy)] + (create-or-update-timeseries-template + client timeseries-id mappings + {:routing-path routing-path + :lifecycle-name ilm-policy-name + :look-ahead-time look-ahead-time + :look-back-time look-back-time + :start-time start-time})) + (edit-datastream client timeseries-id mappings)) (defn delete-timeseries-impl [client timeseries-id _options] diff --git a/code/test/sixsq/nuvla/server/resources/timeseries_lifecycle_test.clj b/code/test/sixsq/nuvla/server/resources/timeseries_lifecycle_test.clj index b18437bee..75fca3df3 100644 --- a/code/test/sixsq/nuvla/server/resources/timeseries_lifecycle_test.clj +++ b/code/test/sixsq/nuvla/server/resources/timeseries_lifecycle_test.clj @@ -3,6 +3,7 @@ [clojure.data.json :as json] [clojure.test :refer [deftest is testing use-fixtures]] [peridot.core :refer [content-type header request session]] + [qbits.spandex :as spandex] [ring.util.codec :as rc] [sixsq.nuvla.db.impl :as db] [sixsq.nuvla.server.app.params :as p] @@ -281,6 +282,12 @@ (ltu/body->edn) (ltu/is-status 200)) + (testing "check that the write index mapping has been updated with the new metric" + (let [client (ltu/es-client) + ts-index-meta (spandex/request client {:url [ts-index], :method :get})] + (is (some? (-> ts-index-meta :body first second :mappings :properties + (get (keyword metric3))))))) + (testing "insert datapoint with updated schema" (let [datapoint {:timestamp (time/now-str) dimension1 "d1-val1" From d6e6b62af88b847b48546e12e15663ed6f2fcb80 Mon Sep 17 00:00:00 2001 From: Alessandro Bellucci Date: Tue, 23 Apr 2024 18:39:53 +0200 Subject: [PATCH 13/28] Remove unused bindings --- code/src/sixsq/nuvla/server/resources/timeseries.clj | 2 +- code/src/sixsq/nuvla/server/resources/timeseries/data_utils.clj | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/code/src/sixsq/nuvla/server/resources/timeseries.clj b/code/src/sixsq/nuvla/server/resources/timeseries.clj index bf764c9c5..b778a2c18 100644 --- a/code/src/sixsq/nuvla/server/resources/timeseries.clj +++ b/code/src/sixsq/nuvla/server/resources/timeseries.clj @@ -129,7 +129,7 @@ The `timeseries` resources represent a timeseries. ;; (defmethod crud/do-action [resource-type utils/action-data] - [{{uuid :uuid :as body} :body :keys [params] :as request}] + [{:keys [params] :as request}] (try (data-utils/wrapped-query-data params request) (catch Exception e diff --git a/code/src/sixsq/nuvla/server/resources/timeseries/data_utils.clj b/code/src/sixsq/nuvla/server/resources/timeseries/data_utils.clj index 0bcb2c31e..b22d8e30d 100644 --- a/code/src/sixsq/nuvla/server/resources/timeseries/data_utils.clj +++ b/code/src/sixsq/nuvla/server/resources/timeseries/data_utils.clj @@ -5,7 +5,6 @@ [ring.middleware.accept :refer [wrap-accept]] [sixsq.nuvla.auth.acl-resource :as a] [sixsq.nuvla.db.filter.parser :as parser] - [sixsq.nuvla.db.impl :as db] [sixsq.nuvla.server.resources.common.crud :as crud] [sixsq.nuvla.server.resources.timeseries.utils :as utils] [sixsq.nuvla.server.util.log :as logu] From 8ff660bcddc76fb4c655a835b2278017dca73e3d Mon Sep 17 00:00:00 2001 From: Alessandro Bellucci Date: Thu, 25 Apr 2024 13:33:08 +0200 Subject: [PATCH 14/28] Rollover indexes when mapping changes --- code/src/sixsq/nuvla/db/es/binding.clj | 24 ++++++++++++++++++- .../nuvla/server/resources/timeseries.clj | 7 +++--- 2 files changed, 27 insertions(+), 4 deletions(-) diff --git a/code/src/sixsq/nuvla/db/es/binding.clj b/code/src/sixsq/nuvla/db/es/binding.clj index db667192c..6658752ac 100644 --- a/code/src/sixsq/nuvla/db/es/binding.clj +++ b/code/src/sixsq/nuvla/db/es/binding.clj @@ -466,13 +466,35 @@ error (:error body)] (log/error "unexpected status code when creating" datastream-index-name "datastream (" status "). " (or error e))))))))) +(defn datastream-mappings + [client datastream-index-name] + (-> (spandex/request client {:url [datastream-index-name :_mapping], :method :get}) + :body seq first second :mappings :properties)) + +(defn datastream-rollover + [client datastream-index-name] + (try + (let [{{:keys [acknowledged]} :body} + (spandex/request client {:url [datastream-index-name :_rollover] + :method :post})] + (if acknowledged + (log/info datastream-index-name "rollover executed successfully") + (log/warn datastream-index-name "rollover may or may not have executed"))) + (catch Exception e + (let [{:keys [status body] :as _response} (ex-data e) + error (:error body)] + (log/error "unexpected status code when executing datastream rollover operation" datastream-index-name "datastream (" status "). " (or error e)))))) + (defn edit-datastream [client datastream-index-name new-mappings] - (let [{{:keys [acknowledged]} :body} + (let [current-mappings (datastream-mappings client datastream-index-name) + {{:keys [acknowledged]} :body} (spandex/request client {:url [datastream-index-name :_mapping] :query-string {:write_index_only true} :method :put :body new-mappings})] + (when-not (= current-mappings (datastream-mappings client datastream-index-name)) + (datastream-rollover client datastream-index-name)) (if acknowledged (log/info datastream-index-name "datastream updated") (log/warn datastream-index-name "datastream may or may not have been updated")))) diff --git a/code/src/sixsq/nuvla/server/resources/timeseries.clj b/code/src/sixsq/nuvla/server/resources/timeseries.clj index b778a2c18..87cd4ade3 100644 --- a/code/src/sixsq/nuvla/server/resources/timeseries.clj +++ b/code/src/sixsq/nuvla/server/resources/timeseries.clj @@ -141,15 +141,16 @@ The `timeseries` resources represent a timeseries. (defmethod crud/set-operations resource-type [{:keys [id] :as resource} request] - (let [insert-op (u/action-map id utils/action-insert) + (let [delete-op (u/operation-map id :delete) + insert-op (u/action-map id utils/action-insert) bulk-insert-op (u/action-map id utils/action-bulk-insert) data-op (u/action-map id utils/action-data) can-manage? (a/can-manage? resource request)] (assoc resource :operations (cond-> [] - can-manage? - (conj insert-op bulk-insert-op data-op))))) + (a/can-delete? resource request) (conj delete-op) + can-manage? (conj insert-op bulk-insert-op data-op))))) ;; From a8d9e8cc578bb14cc5094973bc4291f9dca1d856 Mon Sep 17 00:00:00 2001 From: Alessandro Bellucci Date: Thu, 25 Apr 2024 15:27:09 +0200 Subject: [PATCH 15/28] Rollover indexes when mapping changes --- code/src/sixsq/nuvla/db/es/binding.clj | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/code/src/sixsq/nuvla/db/es/binding.clj b/code/src/sixsq/nuvla/db/es/binding.clj index 6658752ac..7813c0389 100644 --- a/code/src/sixsq/nuvla/db/es/binding.clj +++ b/code/src/sixsq/nuvla/db/es/binding.clj @@ -468,8 +468,8 @@ (defn datastream-mappings [client datastream-index-name] - (-> (spandex/request client {:url [datastream-index-name :_mapping], :method :get}) - :body seq first second :mappings :properties)) + (->> (spandex/request client {:url [datastream-index-name :_mapping], :method :get}) + :body seq (sort-by first) last second :mappings :properties)) (defn datastream-rollover [client datastream-index-name] @@ -494,6 +494,7 @@ :method :put :body new-mappings})] (when-not (= current-mappings (datastream-mappings client datastream-index-name)) + ;; if there was a change in the mappings do a rollover (datastream-rollover client datastream-index-name)) (if acknowledged (log/info datastream-index-name "datastream updated") From 792952ec709bd14f7c4a43bfb69bfcea8a098d94 Mon Sep 17 00:00:00 2001 From: Alessandro Bellucci Date: Fri, 26 Apr 2024 10:04:43 +0200 Subject: [PATCH 16/28] Unit test updated mappings on timeseries update --- .../resources/timeseries_lifecycle_test.clj | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/code/test/sixsq/nuvla/server/resources/timeseries_lifecycle_test.clj b/code/test/sixsq/nuvla/server/resources/timeseries_lifecycle_test.clj index 75fca3df3..0ab508209 100644 --- a/code/test/sixsq/nuvla/server/resources/timeseries_lifecycle_test.clj +++ b/code/test/sixsq/nuvla/server/resources/timeseries_lifecycle_test.clj @@ -3,7 +3,7 @@ [clojure.data.json :as json] [clojure.test :refer [deftest is testing use-fixtures]] [peridot.core :refer [content-type header request session]] - [qbits.spandex :as spandex] + [sixsq.nuvla.db.es.binding :as es-binding] [ring.util.codec :as rc] [sixsq.nuvla.db.impl :as db] [sixsq.nuvla.server.app.params :as p] @@ -282,11 +282,14 @@ (ltu/body->edn) (ltu/is-status 200)) - (testing "check that the write index mapping has been updated with the new metric" - (let [client (ltu/es-client) - ts-index-meta (spandex/request client {:url [ts-index], :method :get})] - (is (some? (-> ts-index-meta :body first second :mappings :properties - (get (keyword metric3))))))) + (testing "check that the timestream mapping has been updated with the new metric" + (let [es-client (ltu/es-client)] + (prn (try (es-binding/datastream-mappings es-client ts-index) + (catch Exception ex (prn ex)))) + (is (= {:time_series_metric "gauge" + :type "double"} + (-> (es-binding/datastream-mappings es-client ts-index) + (get (keyword metric3))))))) (testing "insert datapoint with updated schema" (let [datapoint {:timestamp (time/now-str) From 3477f6817be56c7c54a571bea7d2b43e79c966bb Mon Sep 17 00:00:00 2001 From: Alessandro Bellucci Date: Fri, 26 Apr 2024 10:05:09 +0200 Subject: [PATCH 17/28] Unit test updated mappings on timeseries update --- .../nuvla/server/resources/timeseries_lifecycle_test.clj | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/code/test/sixsq/nuvla/server/resources/timeseries_lifecycle_test.clj b/code/test/sixsq/nuvla/server/resources/timeseries_lifecycle_test.clj index 0ab508209..65ae2d383 100644 --- a/code/test/sixsq/nuvla/server/resources/timeseries_lifecycle_test.clj +++ b/code/test/sixsq/nuvla/server/resources/timeseries_lifecycle_test.clj @@ -283,9 +283,7 @@ (ltu/is-status 200)) (testing "check that the timestream mapping has been updated with the new metric" - (let [es-client (ltu/es-client)] - (prn (try (es-binding/datastream-mappings es-client ts-index) - (catch Exception ex (prn ex)))) + (let [es-client (ltu/es-client)] (is (= {:time_series_metric "gauge" :type "double"} (-> (es-binding/datastream-mappings es-client ts-index) From d5bd9b0f79ebbedc4edd33d46a9cd0767e610d53 Mon Sep 17 00:00:00 2001 From: Alessandro Bellucci Date: Mon, 29 Apr 2024 08:40:20 +0200 Subject: [PATCH 18/28] Defer timeseries creation to first data insertion. Refactor error handling in ES binding --- code/src/sixsq/nuvla/db/es/log.clj | 1 + 1 file changed, 1 insertion(+) create mode 100644 code/src/sixsq/nuvla/db/es/log.clj diff --git a/code/src/sixsq/nuvla/db/es/log.clj b/code/src/sixsq/nuvla/db/es/log.clj new file mode 100644 index 000000000..ba6bbaea3 --- /dev/null +++ b/code/src/sixsq/nuvla/db/es/log.clj @@ -0,0 +1 @@ +(ns sixsq.nuvla.db.es.log) From 65116f73fae7e39e3a720176591ed47f6e321e06 Mon Sep 17 00:00:00 2001 From: Alessandro Bellucci Date: Mon, 29 Apr 2024 08:40:25 +0200 Subject: [PATCH 19/28] Defer timeseries creation to first data insertion. Refactor error handling in ES binding --- code/src/sixsq/nuvla/db/es/binding.clj | 317 ++++++++---------- code/src/sixsq/nuvla/db/es/log.clj | 28 +- .../resources/timeseries_lifecycle_test.clj | 8 +- 3 files changed, 176 insertions(+), 177 deletions(-) diff --git a/code/src/sixsq/nuvla/db/es/binding.clj b/code/src/sixsq/nuvla/db/es/binding.clj index 7813c0389..9165c0c2e 100644 --- a/code/src/sixsq/nuvla/db/es/binding.clj +++ b/code/src/sixsq/nuvla/db/es/binding.clj @@ -10,6 +10,7 @@ [sixsq.nuvla.db.es.common.es-mapping :as mapping] [sixsq.nuvla.db.es.common.utils :as escu] [sixsq.nuvla.db.es.filter :as filter] + [sixsq.nuvla.db.es.log :as es-logu] [sixsq.nuvla.db.es.order :as order] [sixsq.nuvla.db.es.pagination :as paging] [sixsq.nuvla.db.es.script-utils :refer [get-update-script]] @@ -19,6 +20,25 @@ [sixsq.nuvla.server.util.response :as r]) (:import (java.io Closeable))) +(defn spandex-request-plain + "Run a spandex request and checks that the response code is among the expected ones: + if it is not, the response is traced in the logs and a generic exception is returned to the caller. + Does not catch exceptions." + [client request expected-status-set] + (let [{:keys [status body] :as response} (spandex/request client request)] + (if (contains? expected-status-set status) + response + (es-logu/log-and-throw-unexpected-es-status (pr-str body) status expected-status-set)))) + +(defn spandex-request + "Run a spandex request and checks that the response code is among the expected ones: + if it is not, the response is traced in the logs and a generic exception is thrown to the caller. + When a Spandex exception occurs, it is also traced in the logs and a generic exception is thrown to the caller." + [client request expected-status-set] + (try (spandex-request-plain client request expected-status-set) + (catch Exception e + (es-logu/log-and-throw-unexpected-es-ex e)))) + (defn create-index [client index] (try @@ -221,18 +241,12 @@ (defn query-data-native [client index query] - (try - (let [response (spandex/request client {:url [index :_search] - :method :post - :body query})] - (if (shards-successful? response) - (:body response) - (let [msg (str "error when querying: " (:body response))] - (throw (r/ex-response msg 500))))) - (catch Exception e - (let [{:keys [body] :as _response} (ex-data e) - error (:error body) - msg (str "unexpected exception querying: " (or error e))] + (let [response (spandex-request client {:url [index :_search] + :method :post + :body query} #{200})] + (if (shards-successful? response) + (:body response) + (let [msg (str "error when querying: " (:body response))] (throw (r/ex-response msg 500)))))) (defn add-timeseries-datapoint @@ -243,10 +257,11 @@ (let [updated-data (-> data (dissoc :timestamp) (assoc "@timestamp" (:timestamp data))) - response (spandex/request client {:url [index :_doc] - :query-string {:refresh refresh} - :method :post - :body updated-data}) + response (spandex-request-plain client {:url [index :_doc] + :query-string {:refresh refresh} + :method :post + :body updated-data} + #{201}) success? (shards-successful? response)] (if success? {:status 201 @@ -254,38 +269,34 @@ :message (str index " metric added")}} (r/response-conflict index))) (catch Exception e - (let [{:keys [status body] :as _response} (ex-data e) - error (:error body)] + (let [{:keys [status] :as _response} (ex-data e)] (if (= 409 status) (r/response-conflict index) - (r/response-error (str "unexpected exception: " (or error e)))))))) + (es-logu/log-and-throw-unexpected-es-ex e)))))) (defn bulk-insert-timeseries-datapoints [client index data _options] - (try - (let [data-transform (fn [{:keys [timestamp] :as doc}] - (-> doc - (dissoc :timestamp) - (assoc "@timestamp" timestamp))) - body (spandex/chunks->body (interleave (repeat {:create {}}) - (map data-transform data))) - response (spandex/request client {:url [index :_bulk] - :method :put - :headers {"Content-Type" "application/x-ndjson"} - :body body}) - body-response (:body response) - success? (not (errors? response))] - (if success? - body-response - (let [items (:items body-response) - msg (str (if (seq items) - {:errors-count (count items) - :first-error (first items)} - body-response))] - (throw (r/ex-response msg 400))))) - (catch Exception e - (let [{:keys [body status]} (ex-data e)] - (throw (r/ex-response (str body) (or status 500))))))) + (let [data-transform (fn [{:keys [timestamp] :as doc}] + (-> doc + (dissoc :timestamp) + (assoc "@timestamp" timestamp))) + body (spandex/chunks->body (interleave (repeat {:create {}}) + (map data-transform data))) + response (spandex-request client {:url [index :_bulk] + :method :put + :headers {"Content-Type" "application/x-ndjson"} + :body body} + #{200}) + body-response (:body response) + success? (not (errors? response))] + (if success? + body-response + (let [items (:items body-response) + msg (str (if (seq items) + {:errors-count (count items) + :first-error (first items)} + body-response))] + (es-logu/throw-bad-request-ex msg))))) (defn bulk-edit-data [client collection-id @@ -358,141 +369,111 @@ (defn create-or-update-lifecycle-policy [client index ilm-policy] (let [policy-name (str index "-ilm-policy")] - (try - (let [{:keys [status]} - (spandex/request - client - {:url [:_ilm :policy policy-name] - :method :put - :body {:policy - {:_meta {:description (str "ILM policy for " index)} - :phases ilm-policy}}})] - (if (= 200 status) - (do (log/debug policy-name "ILM policy created/updated") - policy-name) - (log/error "unexpected status code when creating/updating" policy-name "ILM policy (" status ")"))) - (catch Exception e - (let [{:keys [status body] :as _response} (ex-data e) - error (:error body)] - (log/error "unexpected status code when creating/updating" policy-name "ILM policy (" status "). " (or error e))))))) + (spandex-request + client + {:url [:_ilm :policy policy-name] + :method :put + :body {:policy + {:_meta {:description (str "ILM policy for " index)} + :phases ilm-policy}}} + #{200}) + (log/debug policy-name "ILM policy created/updated") + policy-name)) (defn delete-lifecycle-policy [client index] (let [policy-name (str index "-ilm-policy")] - (try - (let [{:keys [status]} - (spandex/request - client - {:url [:_ilm :policy policy-name] - :method :delete})] - (if (= 200 status) - (do (log/debug policy-name "ILM policy deleted") - policy-name) - (log/error "unexpected status code when deleting" policy-name "ILM policy (" status ")"))) - (catch Exception e - (let [{:keys [status body] :as _response} (ex-data e) - error (:error body)] - (log/error "unexpected status code when deleting" policy-name "ILM policy (" status "). " (or error e))))))) + (spandex-request + client + {:url [:_ilm :policy policy-name] + :method :delete} + #{200}) + (log/debug policy-name "ILM policy deleted") + policy-name)) (defn create-or-update-timeseries-template [client index mappings {:keys [routing-path look-back-time look-ahead-time start-time lifecycle-name]}] (let [template-name (str index "-template")] - (try - (let [{:keys [status]} (spandex/request client - {:url [:_index_template template-name], - :method :put - :body {:index_patterns [(str index "*")], - :data_stream {}, - :template - {:settings - (cond-> - {:index.mode "time_series", - :number_of_shards 3 - ;:index.look_back_time "7d", - ;:index.look_ahead_time "2h", - ;:index.time_series.start_time "2023-01-01T00:00:00.000Z" - ;:index.lifecycle.name "nuvlabox-status-ts-1d-hf-ilm-policy" - } - routing-path (assoc :index.routing_path routing-path) - look-ahead-time (assoc :index.look_ahead_time look-ahead-time) - look-back-time (assoc :index.look_back_time look-back-time) - start-time (assoc :index.time_series.start_time start-time) - lifecycle-name (assoc :index.lifecycle.name lifecycle-name)) - :mappings mappings}}})] - (if (= 200 status) - (do (log/debug template-name "index template created/updated") - template-name) - (log/error "unexpected status code when creating/updating" template-name "index template (" status ")"))) - (catch Exception e - (let [{:keys [status body] :as _response} (ex-data e) - error (:error body)] - (log/error "unexpected status code when creating/updating" template-name "index template (" status "). " (or error e))))))) + (spandex-request client + {:url [:_index_template template-name], + :method :put + :body {:index_patterns [(str index "*")], + :data_stream {}, + :template + {:settings + (cond-> + {:index.mode "time_series", + :number_of_shards 3 + ;:index.look_back_time "7d", + ;:index.look_ahead_time "2h", + ;:index.time_series.start_time "2023-01-01T00:00:00.000Z" + ;:index.lifecycle.name "nuvlabox-status-ts-1d-hf-ilm-policy" + } + routing-path (assoc :index.routing_path routing-path) + look-ahead-time (assoc :index.look_ahead_time look-ahead-time) + look-back-time (assoc :index.look_back_time look-back-time) + start-time (assoc :index.time_series.start_time start-time) + lifecycle-name (assoc :index.lifecycle.name lifecycle-name)) + :mappings mappings}}} + #{200}) + (log/debug template-name "index template created/updated") + template-name)) (defn delete-timeseries-template [client index] (let [template-name (str index "-template")] - (try - (let [{:keys [status]} (spandex/request client - {:url [:_index_template template-name], - :method :delete})] - (if (= 200 status) - (do (log/debug template-name "index template deleted") - template-name) - (log/error "unexpected status code when deleting" template-name "index template (" status ")"))) - (catch Exception e - (let [{:keys [status body] :as _response} (ex-data e) - error (:error body)] - (log/error "unexpected status code when deleting" template-name "index template (" status "). " (or error e))))))) - -(defn create-datastream + (spandex-request client + {:url [:_index_template template-name], + :method :delete} + #{200}) + (log/debug template-name "index template deleted") + template-name)) + +(defn retrieve-datastream [client datastream-index-name] (try - (let [{:keys [status]} (spandex/request client {:url [:_data_stream datastream-index-name], :method :get})] - (if (= 200 status) - (log/debug datastream-index-name "datastream already exists") - (log/error "unexpected status code when checking" datastream-index-name "datastream (" status ")"))) + (let [{:keys [body]} (spandex-request-plain client {:url [:_data_stream datastream-index-name] :method :get} + #{200})] + (->> body :data_streams first)) (catch Exception e - (let [{:keys [status body]} (ex-data e)] - (try - (if (= 404 status) - (let [{{:keys [acknowledged]} :body} - (spandex/request client {:url [:_data_stream datastream-index-name], :method :put})] - (if acknowledged - (log/info datastream-index-name "datastream created") - (log/warn datastream-index-name "datastream may or may not have been created"))) - (log/error "unexpected status code when checking" datastream-index-name "datastream (" status "). " body)) - (catch Exception e - (let [{:keys [status body] :as _response} (ex-data e) - error (:error body)] - (log/error "unexpected status code when creating" datastream-index-name "datastream (" status "). " (or error e))))))))) + (let [{:keys [status] :as _response} (ex-data e)] + (when (not= 404 status) + (es-logu/log-and-throw-unexpected-es-ex e)))))) + +(defn create-datastream + [client datastream-index-name] + (if (some? (retrieve-datastream client datastream-index-name)) + (es-logu/throw-conflict-ex datastream-index-name) + (let [{{:keys [acknowledged]} :body} + (spandex-request client {:url [:_data_stream datastream-index-name], :method :put} #{200})] + (if acknowledged + (log/info datastream-index-name "datastream created") + (log/warn datastream-index-name "datastream may or may not have been created"))))) (defn datastream-mappings [client datastream-index-name] - (->> (spandex/request client {:url [datastream-index-name :_mapping], :method :get}) + (->> (spandex-request client {:url [datastream-index-name :_mapping], :method :get} #{200}) :body seq (sort-by first) last second :mappings :properties)) (defn datastream-rollover [client datastream-index-name] - (try - (let [{{:keys [acknowledged]} :body} - (spandex/request client {:url [datastream-index-name :_rollover] - :method :post})] - (if acknowledged - (log/info datastream-index-name "rollover executed successfully") - (log/warn datastream-index-name "rollover may or may not have executed"))) - (catch Exception e - (let [{:keys [status body] :as _response} (ex-data e) - error (:error body)] - (log/error "unexpected status code when executing datastream rollover operation" datastream-index-name "datastream (" status "). " (or error e)))))) + (let [{{:keys [acknowledged]} :body} + (spandex-request client {:url [datastream-index-name :_rollover] + :method :post} + #{200})] + (if acknowledged + (log/info datastream-index-name "rollover executed successfully") + (log/warn datastream-index-name "rollover may or may not have executed")))) (defn edit-datastream [client datastream-index-name new-mappings] (let [current-mappings (datastream-mappings client datastream-index-name) {{:keys [acknowledged]} :body} - (spandex/request client {:url [datastream-index-name :_mapping] + (spandex-request client {:url [datastream-index-name :_mapping] :query-string {:write_index_only true} :method :put - :body new-mappings})] + :body new-mappings} + #{200})] (when-not (= current-mappings (datastream-mappings client datastream-index-name)) ;; if there was a change in the mappings do a rollover (datastream-rollover client datastream-index-name)) @@ -502,20 +483,13 @@ (defn delete-datastream [client datastream-index-name] - (try - (let [{:keys [status]} (spandex/request client {:url [:_data_stream datastream-index-name] - :method :delete})] - (if (= 200 status) - (log/debug datastream-index-name "datastream deleted") - (log/error "unexpected status code when deleting" datastream-index-name "datastream (" status ")"))) - (catch Exception e - (let [{:keys [status body] :as _response} (ex-data e) - error (:error body)] - (log/error "unexpected status code when deleting" datastream-index-name "datastream (" status "). " (or error e)))))) + (spandex-request client {:url [:_data_stream datastream-index-name], :method :delete} #{200}) + (log/debug datastream-index-name "datastream deleted")) (defn create-timeseries-impl [client timeseries-id - {:keys [mappings + {:keys [create-datastream? + mappings routing-path ilm-policy look-back-time @@ -530,22 +504,14 @@ :lifecycle-name ilm-policy-name :look-ahead-time look-ahead-time :look-back-time look-back-time - :start-time start-time}) + :start-time start-time})) + (when create-datastream? (create-datastream client timeseries-id))) (defn retrieve-timeseries-impl [client timeseries-id] - (try - (let [response (spandex/request client {:url [:_data_stream timeseries-id], :method :get}) - found? (seq (get-in response [:body :data_streams]))] - (if found? - (:body response) - (throw (r/ex-not-found timeseries-id)))) - (catch Exception e - (let [{:keys [status] :as _response} (ex-data e)] - (if (= 404 status) - (throw (r/ex-not-found timeseries-id)) - (throw e)))))) + (or (retrieve-datastream client timeseries-id) + (throw (r/ex-not-found timeseries-id)))) (defn edit-timeseries-impl [client timeseries-id @@ -566,11 +532,13 @@ :look-ahead-time look-ahead-time :look-back-time look-back-time :start-time start-time})) - (edit-datastream client timeseries-id mappings)) + (when (some? (retrieve-datastream client timeseries-id)) + (edit-datastream client timeseries-id mappings))) (defn delete-timeseries-impl [client timeseries-id _options] - (delete-datastream client timeseries-id) + (when (some? (retrieve-datastream client timeseries-id)) + (delete-datastream client timeseries-id)) (delete-timeseries-template client timeseries-id) (delete-lifecycle-policy client timeseries-id)) @@ -581,6 +549,7 @@ routing-path (mapping/time-series-routing-path spec)] (create-timeseries-impl client timeseries-id (assoc options + :create-datastream? true :mappings mappings :routing-path routing-path)))) diff --git a/code/src/sixsq/nuvla/db/es/log.clj b/code/src/sixsq/nuvla/db/es/log.clj index ba6bbaea3..95324683a 100644 --- a/code/src/sixsq/nuvla/db/es/log.clj +++ b/code/src/sixsq/nuvla/db/es/log.clj @@ -1 +1,27 @@ -(ns sixsq.nuvla.db.es.log) +(ns sixsq.nuvla.db.es.log + (:require [clojure.tools.logging :as log] + [sixsq.nuvla.server.util.response :as r])) + +(defn throw-bad-request-ex + [msg] + (throw (r/ex-bad-request msg))) + +(defn throw-conflict-ex + [id] + (throw (r/ex-conflict id))) + +(defn log-and-throw-unexpected-es-status + ([status expected-status-set] + (log-and-throw-unexpected-es-status "unexpected status code" status expected-status-set)) + ([msg status expected-status-set] + (log/error (str "unexpected status " status ". One of " expected-status-set " was expected.") msg) + (throw (r/ex-response msg 500)))) + +(defn log-and-throw-unexpected-es-ex + ([ex] + (log-and-throw-unexpected-es-ex "unexpected exception" ex)) + ([msg ex] + (let [{:keys [status body]} (ex-data ex) + error (:error body)] + (log/error msg {:status status} (or error ex)) + (throw (r/ex-response msg 500))))) diff --git a/code/test/sixsq/nuvla/server/resources/timeseries_lifecycle_test.clj b/code/test/sixsq/nuvla/server/resources/timeseries_lifecycle_test.clj index 65ae2d383..a15c80858 100644 --- a/code/test/sixsq/nuvla/server/resources/timeseries_lifecycle_test.clj +++ b/code/test/sixsq/nuvla/server/resources/timeseries_lifecycle_test.clj @@ -77,14 +77,15 @@ (ltu/is-operation-present tu/action-insert)) ts-resource (ltu/body ts-response) ts-index (tu/resource-id->timeseries-index ts-id) - ts (db/retrieve-timeseries ts-index) insert-op-url (ltu/get-op-url ts-response tu/action-insert) now (time/now)] (is (= (assoc valid-entry :id ts-id :resource-type "timeseries") (select-keys ts-resource [:resource-type :id :dimensions :metrics :queries]))) - (is (pos? (count (:data_streams ts)))) + + (testing "No timeseries is created yet" + (is (thrown? Exception (db/retrieve-timeseries ts-index)))) (testing "invalid timeseries creation attempts" (-> session-user @@ -161,6 +162,9 @@ (ltu/body->edn) (ltu/is-status 201))) + (testing "timeseries is now created" + (is (some? (db/retrieve-timeseries ts-index)))) + (testing "insert same datapoint again -> conflict" (-> session-user (request insert-op-url From 97a9e6531844208094881eafef8bfdb67e5885ca Mon Sep 17 00:00:00 2001 From: Alessandro Bellucci Date: Mon, 29 Apr 2024 11:56:59 +0200 Subject: [PATCH 20/28] crud/action timeseries/uuid data query should return 404 if datastream is not yet created --- .../resources/timeseries/data_utils.clj | 9 ++++++ .../resources/timeseries_lifecycle_test.clj | 32 ++++++++++++------- 2 files changed, 30 insertions(+), 11 deletions(-) diff --git a/code/src/sixsq/nuvla/server/resources/timeseries/data_utils.clj b/code/src/sixsq/nuvla/server/resources/timeseries/data_utils.clj index b22d8e30d..25cc26f10 100644 --- a/code/src/sixsq/nuvla/server/resources/timeseries/data_utils.clj +++ b/code/src/sixsq/nuvla/server/resources/timeseries/data_utils.clj @@ -5,6 +5,7 @@ [ring.middleware.accept :refer [wrap-accept]] [sixsq.nuvla.auth.acl-resource :as a] [sixsq.nuvla.db.filter.parser :as parser] + [sixsq.nuvla.db.impl :as db] [sixsq.nuvla.server.resources.common.crud :as crud] [sixsq.nuvla.server.resources.timeseries.utils :as utils] [sixsq.nuvla.server.util.log :as logu] @@ -422,6 +423,13 @@ 400))))) params) +(defn throw-timeseries-not-created-yet + [{:keys [timeseries-index] :as params}] + (when-not (db/retrieve-timeseries timeseries-index) + (throw (r/ex-response "timeseries not created yet. Insert some data prior to querying the timeseries" + 404))) + params) + (defn generic-ts-query-data [params request] (-> params @@ -429,6 +437,7 @@ (assoc-query-specs) (assoc-dimensions-filters) (throw-invalid-dimensions) + (throw-timeseries-not-created-yet) (query-data request))) (defn wrapped-query-data diff --git a/code/test/sixsq/nuvla/server/resources/timeseries_lifecycle_test.clj b/code/test/sixsq/nuvla/server/resources/timeseries_lifecycle_test.clj index a15c80858..dcfa67c6a 100644 --- a/code/test/sixsq/nuvla/server/resources/timeseries_lifecycle_test.clj +++ b/code/test/sixsq/nuvla/server/resources/timeseries_lifecycle_test.clj @@ -366,17 +366,6 @@ metric1 20.0 metric2 2}]] - (testing "successful bulk insert" - (-> session-user - (request bulk-insert-op-url - :headers {"bulk" true} - :request-method :post - :body (json/write-str datapoints)) - (ltu/body->edn) - (ltu/is-status 200))) - - (ltu/refresh-es-indices) - (testing "Query metrics" (let [midnight-today (time/truncated-to-days now) midnight-yesterday (time/truncated-to-days (time/minus now (time/duration-unit 1 :days))) @@ -392,6 +381,27 @@ :to (if to (time/to-str to) to-str)} dimensions-filters (assoc :dimension-filter dimensions-filters) granularity (assoc :granularity granularity))))))] + + (testing "query before first insert return 404" + (let [from (time/minus now (time/duration-unit 1 :days)) + to now] + (-> (metrics-request {:queries [query1] + :from from + :to to + :granularity "1-days"}) + (ltu/is-status 404)))) + + (testing "successful bulk insert" + (-> session-user + (request bulk-insert-op-url + :headers {"bulk" true} + :request-method :post + :body (json/write-str datapoints)) + (ltu/body->edn) + (ltu/is-status 200))) + + (ltu/refresh-es-indices) + (testing "basic query" (let [from (time/minus now (time/duration-unit 1 :days)) to now From 9759b1cbf79d08cbc854ea9b660ffcd4406af180 Mon Sep 17 00:00:00 2001 From: Alessandro Bellucci Date: Mon, 29 Apr 2024 13:24:29 +0200 Subject: [PATCH 21/28] Make /data endpoint accept query parameter --- .../nuvla/server/resources/spec/nuvlabox.cljc | 8 +- .../nuvlabox_status_2_lifecycle_test.clj | 114 +++++++++--------- 2 files changed, 62 insertions(+), 60 deletions(-) diff --git a/code/src/sixsq/nuvla/server/resources/spec/nuvlabox.cljc b/code/src/sixsq/nuvla/server/resources/spec/nuvlabox.cljc index 1888e7dfa..808ca523c 100644 --- a/code/src/sixsq/nuvla/server/resources/spec/nuvlabox.cljc +++ b/code/src/sixsq/nuvla/server/resources/spec/nuvlabox.cljc @@ -98,15 +98,17 @@ ;; actions (s/def ::dataset (s/coll-of ::core/nonblank-string)) +(s/def ::query (s/coll-of ::core/nonblank-string)) (s/def ::filter (st/spec ::core/nonblank-string)) (s/def ::from (st/spec ::core/timestamp)) (s/def ::to (st/spec ::core/timestamp)) (s/def ::granularity (st/spec (s/or :raw #{"raw"} :granularity-duration data-utils/granularity->duration))) (s/def ::custom-es-aggregations any?) -(s/def ::bulk-data-body (su/only-keys-maps {:req-un [::dataset - ::from +(s/def ::bulk-data-body (su/only-keys-maps {:req-un [::from ::to] - :opt-un [::filter + :opt-un [::dataset + ::query + ::filter ::granularity ::custom-es-aggregations]})) diff --git a/code/test/sixsq/nuvla/server/resources/nuvlabox_status_2_lifecycle_test.clj b/code/test/sixsq/nuvla/server/resources/nuvlabox_status_2_lifecycle_test.clj index 4e15bea51..a76968055 100644 --- a/code/test/sixsq/nuvla/server/resources/nuvlabox_status_2_lifecycle_test.clj +++ b/code/test/sixsq/nuvla/server/resources/nuvlabox_status_2_lifecycle_test.clj @@ -477,7 +477,7 @@ (let [invalid-format (fn [accept-header] (-> (metrics-request {:accept-header accept-header - :datasets ["cpu-stats"] + :queries ["cpu-stats"] :from (time/minus now (time/duration-unit 1 :days)) :to now :granularity "1-days"}) @@ -489,7 +489,7 @@ (let [metrics-request (fn [accept-header response-content-type] (-> (metrics-request (cond-> - {:datasets ["cpu-stats"] + {:queries ["cpu-stats"] :from (time/minus now (time/duration-unit 1 :days)) :to now :granularity "1-days"} @@ -585,22 +585,22 @@ now (time/now) midnight-today (time/truncated-to-days now) midnight-yesterday (time/truncated-to-days (time/minus now (time/duration-unit 1 :days))) - metrics-request (fn [{:keys [datasets from from-str to to-str granularity custom-es-aggregations accept-header] #_:or #_{accept-header "application/json"}}] + metrics-request (fn [{:keys [queries from from-str to to-str granularity custom-es-aggregations accept-header] #_:or #_{accept-header "application/json"}}] (-> session-nb (content-type "application/x-www-form-urlencoded") (cond-> accept-header (header "accept" accept-header)) (request nuvlabox-data-url :body (rc/form-encode (cond-> - {:dataset datasets - :from (if from (time/to-str from) from-str) - :to (if to (time/to-str to) to-str)} + {:query queries + :from (if from (time/to-str from) from-str) + :to (if to (time/to-str to) to-str)} granularity (assoc :granularity granularity) custom-es-aggregations (assoc :custom-es-aggregations custom-es-aggregations))))))] (testing "new metrics data is added to ts-nuvlaedge time-serie" (let [from (time/minus (time/now) (time/duration-unit 1 :days)) to now - metric-data (-> (metrics-request {:datasets ["cpu-stats" + metric-data (-> (metrics-request {:queries ["cpu-stats" "ram-stats" "disk-stats" "network-stats" @@ -700,7 +700,7 @@ (testing "raw metric data query" (let [from (time/minus (time/now) (time/duration-unit 1 :days)) to now - raw-metric-data (-> (metrics-request {:datasets ["cpu-stats" + raw-metric-data (-> (metrics-request {:queries ["cpu-stats" "ram-stats" "disk-stats" "network-stats" @@ -767,7 +767,7 @@ to now] (testing "custom aggregation on cpu-stats" (let [custom-cpu-agg (-> (metrics-request - {:datasets ["cpu-stats"] + {:queries ["cpu-stats"] :from from :to to :custom-es-aggregations (json/write-str @@ -790,7 +790,7 @@ (:cpu-stats custom-cpu-agg))))) (testing "custom aggregation on disk-stats" (let [custom-cpu-agg (-> (metrics-request - {:datasets ["disk-stats"] + {:queries ["disk-stats"] :from from :to to :custom-es-aggregations (json/write-str @@ -821,43 +821,43 @@ :message))] (is (= "exactly one query must be specified with accept header 'text/csv'" (invalid-request {:accept-header "text/csv" - :datasets ["cpu-stats" "network-stats"] + :queries ["cpu-stats" "network-stats"] :from (time/minus now (time/duration-unit 1 :days)) :to now :granularity "1-days"}))) (is (= "from parameter is mandatory, with format iso8601 (uuuu-MM-dd'T'HH:mm:ss[.SSS]Z)" - (invalid-request {:datasets ["cpu-stats"] + (invalid-request {:queries ["cpu-stats"] :granularity "1-days"}))) (is (= "from parameter is mandatory, with format iso8601 (uuuu-MM-dd'T'HH:mm:ss[.SSS]Z)" - (invalid-request {:datasets ["cpu-stats"] + (invalid-request {:queries ["cpu-stats"] :from-str "wrong-datetime" :granularity "1-days"}))) (is (= "to parameter is mandatory, with format iso8601 (uuuu-MM-dd'T'HH:mm:ss[.SSS]Z)" - (invalid-request {:datasets ["cpu-stats"] + (invalid-request {:queries ["cpu-stats"] :from (time/minus now (time/duration-unit 1 :days)) :granularity "1-days"}))) (is (= "to parameter is mandatory, with format iso8601 (uuuu-MM-dd'T'HH:mm:ss[.SSS]Z)" - (invalid-request {:datasets ["cpu-stats"] + (invalid-request {:queries ["cpu-stats"] :from (time/minus now (time/duration-unit 1 :days)) :to-str "wrong-datetime" :granularity "1-days"}))) (is (= "from must be before to" - (invalid-request {:datasets ["cpu-stats"] + (invalid-request {:queries ["cpu-stats"] :from now :to now :granularity "1-days"}))) (is (= "unknown queries: invalid-1,invalid-2" - (invalid-request {:datasets ["invalid-1" "cpu-stats" "invalid-2"] + (invalid-request {:queries ["invalid-1" "cpu-stats" "invalid-2"] :from (time/minus now (time/duration-unit 1 :days)) :to now :granularity "1-days"}))) (is (= "unrecognized value for granularity 1-invalid" - (invalid-request {:datasets ["cpu-stats"] + (invalid-request {:queries ["cpu-stats"] :from (time/minus now (time/duration-unit 1 :days)) :to now :granularity "1-invalid"}))) (is (= "too many data points requested. Please restrict the time interval or increase the time granularity." - (invalid-request {:datasets ["cpu-stats"] + (invalid-request {:queries ["cpu-stats"] :from (time/minus now (time/duration-unit 1 :days)) :to now :granularity "1-minutes"}))))) @@ -865,9 +865,9 @@ (testing "csv export of metrics data" (let [from (time/minus now (time/duration-unit 1 :days)) to now - csv-request (fn [dataset granularity] + csv-request (fn [query granularity] (-> (metrics-request {:accept-header "text/csv" - :datasets [dataset] + :queries [query] :from from :to to :granularity granularity}) @@ -959,7 +959,7 @@ (testing "Export with custom es aggregations not allowed" (let [csv-custom-cpu-agg (-> (metrics-request {:accept-header "text/csv" - :datasets ["cpu-stats"] + :queries ["cpu-stats"] :from from :to to :custom-es-aggregations (json/write-str @@ -1017,7 +1017,7 @@ now (time/now) midnight-today (time/truncated-to-days now) midnight-yesterday (time/truncated-to-days (time/minus now (time/duration-unit 1 :days))) - metrics-request (fn [{:keys [datasets from from-str to to-str granularity custom-es-aggregations accept-header]}] + metrics-request (fn [{:keys [queries from from-str to to-str granularity custom-es-aggregations accept-header]}] (-> session-nb (cond-> accept-header (header "accept" accept-header)) (request nuvlabox-data-url @@ -1025,20 +1025,20 @@ :headers {:bulk true} :body (json/write-str (cond-> - {:filter (str "(id='" nuvlabox-id "'" - " or id='" nuvlabox-id-2 "'" - " or id='" nuvlabox-id-3 "'" - " or id='" nuvlabox-id-4 "')") - :dataset datasets - :from (if from (time/to-str from) from-str) - :to (if to (time/to-str to) to-str)} + {:filter (str "(id='" nuvlabox-id "'" + " or id='" nuvlabox-id-2 "'" + " or id='" nuvlabox-id-3 "'" + " or id='" nuvlabox-id-4 "')") + :query queries + :from (if from (time/to-str from) from-str) + :to (if to (time/to-str to) to-str)} granularity (assoc :granularity granularity) custom-es-aggregations (assoc :custom-es-aggregations custom-es-aggregations))))))] (testing "new metrics data is added to ts-nuvlaedge time-serie" (ltu/refresh-es-indices) (let [from (time/minus (time/now) (time/duration-unit 1 :days)) to now - metric-data (-> (metrics-request {:datasets ["cpu-stats" + metric-data (-> (metrics-request {:queries ["cpu-stats" "ram-stats" "disk-stats" "network-stats" @@ -1116,7 +1116,7 @@ (ltu/refresh-es-indices) (let [from (time/minus (time/now) (time/duration-unit 1 :days)) to now - raw-metric-data (-> (metrics-request {:datasets ["cpu-stats" + raw-metric-data (-> (metrics-request {:queries ["cpu-stats" "ram-stats" "disk-stats" "network-stats" @@ -1223,7 +1223,7 @@ to now] (testing "custom aggregation on cpu-stats" (let [custom-cpu-agg (-> (metrics-request - {:datasets ["cpu-stats"] + {:queries ["cpu-stats"] :from from :to to :custom-es-aggregations {:agg1 {:date_histogram @@ -1245,7 +1245,7 @@ (:cpu-stats custom-cpu-agg))))) (testing "custom aggregation on disk-stats" (let [custom-disk-agg (-> (metrics-request - {:datasets ["disk-stats"] + {:queries ["disk-stats"] :from from :to to :custom-es-aggregations {:agg1 {:date_histogram @@ -1275,27 +1275,27 @@ :message))] (is (= "exactly one query must be specified with accept header 'text/csv'" (invalid-request {:accept-header "text/csv" - :datasets ["cpu-stats" "network-stats"] + :queries ["cpu-stats" "network-stats"] :from (time/minus now (time/duration-unit 1 :days)) :to now :granularity "1-days"}))) (is (= "from must be before to" - (invalid-request {:datasets ["cpu-stats"] + (invalid-request {:queries ["cpu-stats"] :from now :to now :granularity "1-days"}))) (is (= "unknown queries: invalid-1,invalid-2" - (invalid-request {:datasets ["invalid-1" "cpu-stats" "invalid-2"] + (invalid-request {:queries ["invalid-1" "cpu-stats" "invalid-2"] :from (time/minus now (time/duration-unit 1 :days)) :to now :granularity "1-days"}))) (is (= "unrecognized value for granularity 1-invalid" - (invalid-request {:datasets ["cpu-stats"] + (invalid-request {:queries ["cpu-stats"] :from (time/minus now (time/duration-unit 1 :days)) :to now :granularity "1-invalid"}))) (is (= "too many data points requested. Please restrict the time interval or increase the time granularity." - (invalid-request {:datasets ["cpu-stats"] + (invalid-request {:queries ["cpu-stats"] :from (time/minus now (time/duration-unit 1 :days)) :to now :granularity "1-minutes"}))))) @@ -1303,9 +1303,9 @@ (testing "csv export of metrics data" (let [from (time/minus now (time/duration-unit 1 :days)) to now - csv-request (fn [dataset granularity] + csv-request (fn [query granularity] (-> (metrics-request {:accept-header "text/csv" - :datasets [dataset] + :queries [query] :from from :to to :granularity granularity}) @@ -1572,20 +1572,20 @@ (testing "availability data on a single nuvlabox" (let [nuvlabox-data-url (str nuvlabox-url "/data") - metrics-request (fn [{:keys [datasets from from-str to to-str granularity accept-header] #_:or #_{accept-header "application/json"}}] + metrics-request (fn [{:keys [queries from from-str to to-str granularity accept-header] #_:or #_{accept-header "application/json"}}] (-> session-nb (content-type "application/x-www-form-urlencoded") (cond-> accept-header (header "accept" accept-header)) (request nuvlabox-data-url :body (rc/form-encode - {:dataset datasets + {:query queries :from (if from (time/to-str from) from-str) :to (if to (time/to-str to) to-str) :granularity granularity}))))] (testing "from midnight yesterday until now" (let [from midnight-yesterday to now - metric-data (-> (metrics-request {:datasets ["availability-stats"] + metric-data (-> (metrics-request {:queries ["availability-stats"] :from from :to to :granularity "1-days"}) @@ -1618,7 +1618,7 @@ (testing "raw availability data query" (let [from (time/minus now (time/duration-unit 1 :days)) to now - raw-availability-data (-> (metrics-request {:datasets ["availability-stats"] + raw-availability-data (-> (metrics-request {:queries ["availability-stats"] :from from :to to :granularity "raw"}) @@ -1637,9 +1637,9 @@ (testing "csv export of availability data" (let [from midnight-yesterday to now - csv-request (fn [dataset granularity] + csv-request (fn [query granularity] (-> (metrics-request {:accept-header "text/csv" - :datasets [dataset] + :queries [query] :from from :to to :granularity granularity}) @@ -1691,7 +1691,7 @@ nuvlabox-data-url (str p/service-context nb/resource-type "/data") midnight-today (time/truncated-to-days now) midnight-yesterday (time/truncated-to-days (time/minus now (time/duration-unit 1 :days))) - metrics-request (fn [{:keys [datasets from from-str to to-str granularity accept-header]}] + metrics-request (fn [{:keys [queries from from-str to to-str granularity accept-header]}] (-> session-nb (cond-> accept-header (header "accept" accept-header)) (request nuvlabox-data-url @@ -1702,7 +1702,7 @@ " or id='" nuvlabox-id-3 "'" " or id='" nuvlabox-id-4 "'" " or id='" nuvlabox-id-5 "')") - :dataset datasets + :query queries :from (if from (time/to-str from) from-str) :to (if to (time/to-str to) to-str) :granularity granularity}))))] @@ -1710,7 +1710,7 @@ (ltu/refresh-es-indices) (let [from midnight-yesterday to now - metric-data (-> (metrics-request {:datasets ["availability-stats" + metric-data (-> (metrics-request {:queries ["availability-stats" "availability-by-edge"] :from from :to to @@ -1775,7 +1775,7 @@ (testing "raw availability data query" (let [from midnight-yesterday to now - raw-availability-data (-> (metrics-request {:datasets ["availability-stats"] + raw-availability-data (-> (metrics-request {:queries ["availability-stats"] :from from :to to :granularity "raw"}) @@ -1797,9 +1797,9 @@ (:availability-stats raw-availability-data))))) (testing "csv export of availability data" - (let [csv-request (fn [dataset granularity] + (let [csv-request (fn [query granularity] (-> (metrics-request {:accept-header "text/csv" - :datasets [dataset] + :queries [query] :from midnight-yesterday :to now :granularity granularity}) @@ -1855,14 +1855,14 @@ (let [nuvlabox-data-url (str p/service-context nb/resource-type "/data") midnight-today (time/truncated-to-days now) midnight-yesterday (time/truncated-to-days (time/minus now (time/duration-unit 1 :days))) - metrics-request (fn [{:keys [datasets from from-str to to-str granularity accept-header]}] + metrics-request (fn [{:keys [queries from from-str to to-str granularity accept-header]}] (-> session-nb (cond-> accept-header (header "accept" accept-header)) (request nuvlabox-data-url :request-method :patch :headers {:bulk true} :body (json/write-str - {:dataset datasets + {:query queries :from (if from (time/to-str from) from-str) :to (if to (time/to-str to) to-str) :granularity granularity})))) @@ -1873,7 +1873,7 @@ (let [from now-1d to now] (with-redefs [data-utils/query-data-max-time 100] - (-> (metrics-request {:datasets ["availability-stats"] + (-> (metrics-request {:queries ["availability-stats"] :from from :to to :granularity "1-days"}) @@ -1884,7 +1884,7 @@ (testing "availability query performance" (let [[elapsed-time metric-data] (logt/logtime1 - (-> (metrics-request {:datasets ["availability-stats"] + (-> (metrics-request {:queries ["availability-stats"] :from from :to to :granularity "1-days"}) From e709f88234658d7451025706abb3c54f02fab358 Mon Sep 17 00:00:00 2001 From: Alessandro Bellucci Date: Thu, 2 May 2024 07:46:29 +0200 Subject: [PATCH 22/28] Timestamp query csv export --- .../server/resources/nuvlabox/data_utils.clj | 64 ++-------- .../resources/timeseries/data_utils.clj | 115 ++++++++++++++++-- .../resources/timeseries_lifecycle_test.clj | 31 ++++- 3 files changed, 143 insertions(+), 67 deletions(-) diff --git a/code/src/sixsq/nuvla/server/resources/nuvlabox/data_utils.clj b/code/src/sixsq/nuvla/server/resources/nuvlabox/data_utils.clj index 5c979cf2d..a9d56dad4 100644 --- a/code/src/sixsq/nuvla/server/resources/nuvlabox/data_utils.clj +++ b/code/src/sixsq/nuvla/server/resources/nuvlabox/data_utils.clj @@ -1,6 +1,5 @@ (ns sixsq.nuvla.server.resources.nuvlabox.data-utils (:require - [clojure.data.csv :as csv] [clojure.set :as set] [clojure.string :as str] [clojure.tools.logging :as log] @@ -18,11 +17,7 @@ [sixsq.nuvla.server.resources.ts-nuvlaedge-telemetry :as ts-nuvlaedge-telemetry] [sixsq.nuvla.server.util.log :as logu] [sixsq.nuvla.server.util.time :as time]) - (:import - (java.io StringWriter) - (java.text DecimalFormat DecimalFormatSymbols) - (java.util Locale) - (java.util.concurrent ExecutionException TimeoutException))) + (:import (java.util.concurrent ExecutionException TimeoutException))) (def running-query-data (atom 0)) (def requesting-query-data (atom 0)) @@ -590,45 +585,6 @@ [[query-opts resp]] [query-opts (update-in resp [0] dissoc :hits)]) -(defn throw-custom-aggregations-not-exportable - [{:keys [custom-es-aggregations]}] - (when custom-es-aggregations - (logu/log-and-throw-400 "Custom aggregations cannot be exported to csv format"))) - -(defn metrics-data->csv [options dimension-keys meta-keys metric-keys data-fn response] - (with-open [writer (StringWriter.)] - ;; write csv header - (csv/write-csv writer [(concat (map name dimension-keys) - (map name meta-keys) - (map name metric-keys))]) - ;; write csv data - (let [df (DecimalFormat. "0.####" (DecimalFormatSymbols. Locale/US))] - (csv/write-csv writer - (for [{:keys [dimensions ts-data]} response - data-point ts-data] - (concat (map dimensions dimension-keys) - (map data-point meta-keys) - (map (fn [metric-key] - (let [v (data-fn options data-point metric-key)] - (if (float? v) - ;; format floats with 4 decimal and dot separator - (.format df v) - v))) - metric-keys))))) - (.toString writer))) - -(defn csv-export-fn - [dimension-keys-fn meta-keys-fn metric-keys-fn data-fn] - (fn [{:keys [resps] :as options}] - (throw-custom-aggregations-not-exportable options) - (metrics-data->csv - options - (dimension-keys-fn options) - (meta-keys-fn options) - (metric-keys-fn options) - data-fn - (first resps)))) - (defn csv-dimension-keys-fn [] (fn [{:keys [raw predefined-aggregations queries query-specs mode]}] @@ -677,10 +633,10 @@ (defn availability-csv-export-fn [] - (csv-export-fn (csv-dimension-keys-fn) - (csv-meta-keys-fn) - (availability-csv-metric-keys-fn) - (availability-csv-data-fn))) + (ts-data-utils/csv-export-fn (csv-dimension-keys-fn) + (csv-meta-keys-fn) + (availability-csv-metric-keys-fn) + (availability-csv-data-fn))) (defn telemetry-csv-metric-keys-fn [metric] @@ -707,10 +663,10 @@ (defn telemetry-csv-export-fn [metric] - (csv-export-fn (csv-dimension-keys-fn) - (csv-meta-keys-fn) - (telemetry-csv-metric-keys-fn metric) - (telemetry-csv-data-fn metric))) + (ts-data-utils/csv-export-fn (csv-dimension-keys-fn) + (csv-meta-keys-fn) + (telemetry-csv-metric-keys-fn metric) + (telemetry-csv-data-fn metric))) (defn single-edge-queries [] @@ -1177,7 +1133,7 @@ Allow max 4 additional requests to wait at most 5 seconds to get access to computation." [{:keys [mode query dataset] :as params} request] - (let [query (or query dataset) + (let [query (or query dataset) queries (if (coll? query) query [query])] (if (and (= :multi-edge-query mode) (some #{"availability-stats" "availability-by-edge"} queries)) diff --git a/code/src/sixsq/nuvla/server/resources/timeseries/data_utils.clj b/code/src/sixsq/nuvla/server/resources/timeseries/data_utils.clj index 25cc26f10..617fb03be 100644 --- a/code/src/sixsq/nuvla/server/resources/timeseries/data_utils.clj +++ b/code/src/sixsq/nuvla/server/resources/timeseries/data_utils.clj @@ -1,5 +1,6 @@ (ns sixsq.nuvla.server.resources.timeseries.data-utils - (:require [clojure.data.json :as json] + (:require [clojure.data.csv :as csv] + [clojure.data.json :as json] [clojure.set :as set] [clojure.string :as str] [ring.middleware.accept :refer [wrap-accept]] @@ -10,7 +11,11 @@ [sixsq.nuvla.server.resources.timeseries.utils :as utils] [sixsq.nuvla.server.util.log :as logu] [sixsq.nuvla.server.util.response :as r] - [sixsq.nuvla.server.util.time :as time])) + [sixsq.nuvla.server.util.time :as time]) + (:import + (java.io StringWriter) + (java.text DecimalFormat DecimalFormatSymbols) + (java.util Locale))) (def max-data-points 200) @@ -361,10 +366,10 @@ (crud/query-as-admin timeseries-index) (->ts-query-resp (assoc params :->resp-dimensions-fn ->resp-dimensions)))) -(defmulti ts-query->query-spec (fn [{:keys [query-type]}] query-type)) +(defmulti ts-query->query-spec (fn [_params {:keys [query-type]}] query-type)) (defmethod ts-query->query-spec :default - [{:keys [query-type]}] + [_params {:keys [query-type]}] (logu/log-and-throw-400 (str "unrecognized query type " query-type))) (defn parse-aggregations @@ -374,25 +379,111 @@ [aggregation-name {(keyword aggregation-type) {:field field-name}}])) (into {}))) +(defn throw-custom-aggregations-not-exportable + [{:keys [custom-es-aggregations]}] + (when custom-es-aggregations + (logu/log-and-throw-400 "Custom aggregations cannot be exported to csv format"))) + +(defn metrics-data->csv [options dimension-keys meta-keys metric-keys data-fn response] + (with-open [writer (StringWriter.)] + ;; write csv header + (csv/write-csv writer [(concat (map name dimension-keys) + (map name meta-keys) + (map name metric-keys))]) + ;; write csv data + (let [df (DecimalFormat. "0.####" (DecimalFormatSymbols. Locale/US))] + (csv/write-csv writer + (for [{:keys [dimensions ts-data]} response + data-point ts-data] + (concat (map dimensions dimension-keys) + (map data-point meta-keys) + (map (fn [metric-key] + (let [v (data-fn options data-point metric-key)] + (if (float? v) + ;; format floats with 4 decimal and dot separator + (.format df v) + v))) + metric-keys))))) + (.toString writer))) + +(defn csv-export-fn + [dimension-keys-fn meta-keys-fn metric-keys-fn data-fn] + (fn [{:keys [resps] :as options}] + (throw-custom-aggregations-not-exportable options) + (metrics-data->csv + options + (dimension-keys-fn options) + (meta-keys-fn options) + (metric-keys-fn options) + data-fn + (first resps)))) + +(defn csv-dimension-keys-fn + [{:keys [dimensions]} _query-spec] + (fn [{:keys [raw predefined-aggregations queries query-specs mode]}] + (cond + raw + [] + + predefined-aggregations + (let [{group-by-field :group-by} (get query-specs (first queries)) + dimension-keys (map :field-name dimensions)] + (cond-> dimension-keys + (and predefined-aggregations group-by-field) (conj group-by-field)))))) + +(defn csv-meta-keys-fn + [{:keys [dimensions]} _query-spec] + (fn [{:keys [predefined-aggregations raw]}] + (cond + raw (concat [:timestamp] (map :field-name dimensions)) + predefined-aggregations [:timestamp :doc-count]))) + +(defn csv-query-keys-fn + [{:keys [query-name]}] + (fn [{:keys [predefined-aggregations raw queries query-specs resps]}] + (let [{:keys [aggregations response-aggs]} + (get query-specs (first queries))] + (cond + raw + (sort (keys (-> resps ffirst :ts-data first (get query-name)))) + + predefined-aggregations + (or response-aggs (keys aggregations)))))) + +(defn csv-data-fn + [{:keys [query-name]}] + (fn [{:keys [predefined-aggregations raw]} + {:keys [aggregations] :as data-point} metric-key] + (cond + raw + (get-in data-point [query-name metric-key]) + + predefined-aggregations + (get-in aggregations [(keyword metric-key) :value])))) + +(defn generic-csv-export-fn + [timeseries ts-query] + (csv-export-fn (csv-dimension-keys-fn timeseries ts-query) + (csv-meta-keys-fn timeseries ts-query) + (csv-query-keys-fn ts-query) + (csv-data-fn ts-query))) + (defmethod ts-query->query-spec "standard" - [{:keys [query] :as _ts-query}] + [{:keys [timeseries]} {:keys [query] :as ts-query}] {:query-fn generic-query-fn :aggregations (some-> query :aggregations parse-aggregations) - ; :csv-export-fn (telemetry-csv-export-fn :cpu) - }) + :csv-export-fn (generic-csv-export-fn timeseries ts-query)}) (defmethod ts-query->query-spec "custom-es-query" - [{:keys [custom-es-query] :as _ts-query}] + [_params {:keys [custom-es-query] :as _ts-query}] {:query-fn generic-query-fn - :aggregations (some-> custom-es-query :aggregations) - ; :csv-export-fn (telemetry-csv-export-fn :cpu) - }) + :aggregations (some-> custom-es-query :aggregations)}) (defn assoc-query-specs [{:keys [timeseries] :as params}] (let [query-specs (-> (get timeseries :queries) (->> (group-by :query-name)) - (update-vals (comp ts-query->query-spec first)))] + (update-vals (comp (partial ts-query->query-spec params) first)))] (cond-> params query-specs (assoc :query-specs query-specs)))) diff --git a/code/test/sixsq/nuvla/server/resources/timeseries_lifecycle_test.clj b/code/test/sixsq/nuvla/server/resources/timeseries_lifecycle_test.clj index dcfa67c6a..53d5cc22a 100644 --- a/code/test/sixsq/nuvla/server/resources/timeseries_lifecycle_test.clj +++ b/code/test/sixsq/nuvla/server/resources/timeseries_lifecycle_test.clj @@ -1,6 +1,7 @@ (ns sixsq.nuvla.server.resources.timeseries-lifecycle-test (:require [clojure.data.json :as json] + [clojure.string :as str] [clojure.test :refer [deftest is testing use-fixtures]] [peridot.core :refer [content-type header request session]] [sixsq.nuvla.db.es.binding :as es-binding] @@ -478,7 +479,35 @@ (is (= [{:dimensions {(keyword dimension1) "all"} :ts-data (set (map #(update-keys % keyword) datapoints))}] (-> (get metric-data (keyword query1)) - (update-in [0 :ts-data] set)))))))))) + (update-in [0 :ts-data] set)))))) + + (testing "csv export" + (let [from (time/minus now (time/duration-unit 1 :days)) + to now + csv-request (fn [query granularity] + (-> (metrics-request {:accept-header "text/csv" + :queries [query] + :from from + :to to + :granularity granularity}) + (ltu/is-status 200) + (ltu/is-header "Content-Type" "text/csv") + (ltu/is-header "Content-disposition" "attachment;filename=export.csv") + (ltu/body)))] + (testing "Basic query" + (is (= (str "test-dimension1,timestamp,doc-count,test-metric1-avg\n" + (str/join "," ["all" (time/to-str midnight-yesterday) + 0 nil]) "\n" + (str/join "," ["all" (time/to-str midnight-today) + 2 15]) "\n") + (csv-request query1 "1-days")))) + #_(testing "Export raw data" + (is (= (str "timestamp,test-dimension1,test-metric1\n" + (str/join "," [(time/to-str midnight-yesterday) + 10 5.5]) "\n") + (csv-request query1 "raw")))) + (testing "Export with custom queries not allowed" + ))))))) (deftest bad-methods (let [resource-uri (str p/service-context (u/new-resource-id t/resource-type))] From 1e26f8e5233f9f615b034a82dcb4bb58682c44c6 Mon Sep 17 00:00:00 2001 From: Alessandro Bellucci Date: Mon, 6 May 2024 07:44:05 +0200 Subject: [PATCH 23/28] Sending data outside accepted temporal range should return 4xx error instead of 500 error --- code/src/sixsq/nuvla/db/es/binding.clj | 26 +++++++------ code/src/sixsq/nuvla/db/es/log.clj | 18 +++++++-- .../nuvla/server/resources/timeseries.clj | 1 + .../server/resources/timeseries/utils.clj | 17 ++++++++- .../resources/timeseries_lifecycle_test.clj | 37 ++++++++++++++++--- 5 files changed, 78 insertions(+), 21 deletions(-) diff --git a/code/src/sixsq/nuvla/db/es/binding.clj b/code/src/sixsq/nuvla/db/es/binding.clj index 9165c0c2e..e8aee6a12 100644 --- a/code/src/sixsq/nuvla/db/es/binding.clj +++ b/code/src/sixsq/nuvla/db/es/binding.clj @@ -274,6 +274,19 @@ (r/response-conflict index) (es-logu/log-and-throw-unexpected-es-ex e)))))) +(defn process-es-response + [response] + (let [body-response (:body response) + success? (not (errors? response))] + (if success? + body-response + (let [items (:items body-response) + msg (str (if (seq items) + {:errors-count (count items) + :first-error (first items)} + body-response))] + (es-logu/log-and-throw-unexpected-es-ex msg (ex-info msg {})))))) + (defn bulk-insert-timeseries-datapoints [client index data _options] (let [data-transform (fn [{:keys [timestamp] :as doc}] @@ -286,17 +299,8 @@ :method :put :headers {"Content-Type" "application/x-ndjson"} :body body} - #{200}) - body-response (:body response) - success? (not (errors? response))] - (if success? - body-response - (let [items (:items body-response) - msg (str (if (seq items) - {:errors-count (count items) - :first-error (first items)} - body-response))] - (es-logu/throw-bad-request-ex msg))))) + #{200})] + (process-es-response response))) (defn bulk-edit-data [client collection-id diff --git a/code/src/sixsq/nuvla/db/es/log.clj b/code/src/sixsq/nuvla/db/es/log.clj index 95324683a..797389271 100644 --- a/code/src/sixsq/nuvla/db/es/log.clj +++ b/code/src/sixsq/nuvla/db/es/log.clj @@ -10,6 +10,15 @@ [id] (throw (r/ex-conflict id))) +(defn log-and-throw-bad-request-ex + ([ex] + (log-and-throw-bad-request-ex "bad request" ex)) + ([msg ex] + (let [{:keys [status body]} (ex-data ex) + error (:error body)] + (log/error msg {:status status} (or error ex)) + (throw (r/ex-bad-request msg))))) + (defn log-and-throw-unexpected-es-status ([status expected-status-set] (log-and-throw-unexpected-es-status "unexpected status code" status expected-status-set)) @@ -19,9 +28,12 @@ (defn log-and-throw-unexpected-es-ex ([ex] - (log-and-throw-unexpected-es-ex "unexpected exception" ex)) + (log-and-throw-unexpected-es-ex "unexpected error" ex)) ([msg ex] + (log-and-throw-unexpected-es-ex msg "unexpected error" ex)) + ([internal-msg external-msg ex] (let [{:keys [status body]} (ex-data ex) error (:error body)] - (log/error msg {:status status} (or error ex)) - (throw (r/ex-response msg 500))))) + (log/error internal-msg {:status status} (or error ex)) + (throw (r/ex-response external-msg 500))))) + diff --git a/code/src/sixsq/nuvla/server/resources/timeseries.clj b/code/src/sixsq/nuvla/server/resources/timeseries.clj index 87cd4ade3..2a7a2f192 100644 --- a/code/src/sixsq/nuvla/server/resources/timeseries.clj +++ b/code/src/sixsq/nuvla/server/resources/timeseries.clj @@ -166,3 +166,4 @@ The `timeseries` resources represent a timeseries. (defn initialize [] (std-crud/initialize resource-type ::timeseries/schema)) + diff --git a/code/src/sixsq/nuvla/server/resources/timeseries/utils.clj b/code/src/sixsq/nuvla/server/resources/timeseries/utils.clj index eee92887f..01b96b3c3 100644 --- a/code/src/sixsq/nuvla/server/resources/timeseries/utils.clj +++ b/code/src/sixsq/nuvla/server/resources/timeseries/utils.clj @@ -134,13 +134,28 @@ (str "unexpected keys: " (str/join "," extra-keys)) 400))))) +(defn throw-outside-acceptable-time-range + [_timeseries {:keys [timestamp] :as datapoint}] + (let [ts (time/parse-date timestamp) + now (time/now) + look-ahead-time (time/duration-unit 2 :hours) + look-back-time (time/duration-unit 7 :days) + start-time (time/minus now look-back-time) + end-time (time/plus now look-ahead-time)] + (if (and (time/before? start-time ts) (time/before? ts end-time)) + datapoint + (throw (r/ex-response + (str "timestamp is outside acceptable range: " ts " not in [" start-time " - " end-time "]") + 400))))) + (defn validate-datapoint [timeseries datapoint] (->> datapoint (throw-missing-dimensions timeseries) (throw-missing-mandatory-metrics timeseries) (throw-wrong-types timeseries) - (throw-extra-keys timeseries))) + (throw-extra-keys timeseries) + (throw-outside-acceptable-time-range timeseries))) (defn validate-datapoints [timeseries datapoints] diff --git a/code/test/sixsq/nuvla/server/resources/timeseries_lifecycle_test.clj b/code/test/sixsq/nuvla/server/resources/timeseries_lifecycle_test.clj index 53d5cc22a..675f4f694 100644 --- a/code/test/sixsq/nuvla/server/resources/timeseries_lifecycle_test.clj +++ b/code/test/sixsq/nuvla/server/resources/timeseries_lifecycle_test.clj @@ -209,6 +209,31 @@ (ltu/is-status 400) (ltu/is-key-value :message "Bulk request should contain bulk http header."))) + (testing "Sending data outside insert window should throw 400 error" + (-> session-user + (request bulk-insert-op-url + :headers {"bulk" true} + :request-method :post + :body (json/write-str + (map (fn [entry] + (assoc entry :timestamp + (time/to-str (time/minus now (time/duration-unit 2 :weeks))))) + datapoints))) + (ltu/body->edn) + (ltu/is-status 400)) + + (-> session-user + (request bulk-insert-op-url + :headers {"bulk" true} + :request-method :post + :body (json/write-str + (map (fn [entry] + (assoc entry :timestamp + (time/to-str (time/plus now (time/duration-unit 4 :hours))))) + datapoints))) + (ltu/body->edn) + (ltu/is-status 400))) + (testing "successful bulk insert" (-> session-user (request bulk-insert-op-url @@ -384,8 +409,8 @@ granularity (assoc :granularity granularity))))))] (testing "query before first insert return 404" - (let [from (time/minus now (time/duration-unit 1 :days)) - to now] + (let [from (time/minus now (time/duration-unit 1 :days)) + to now] (-> (metrics-request {:queries [query1] :from from :to to @@ -502,10 +527,10 @@ 2 15]) "\n") (csv-request query1 "1-days")))) #_(testing "Export raw data" - (is (= (str "timestamp,test-dimension1,test-metric1\n" - (str/join "," [(time/to-str midnight-yesterday) - 10 5.5]) "\n") - (csv-request query1 "raw")))) + (is (= (str "timestamp,test-dimension1,test-metric1\n" + (str/join "," [(time/to-str midnight-yesterday) + 10 5.5]) "\n") + (csv-request query1 "raw")))) (testing "Export with custom queries not allowed" ))))))) From 86755d864a9137cc519b9047db2c31871e91e34a Mon Sep 17 00:00:00 2001 From: Alessandro Bellucci Date: Mon, 6 May 2024 09:22:50 +0200 Subject: [PATCH 24/28] Query raw data --- .../resources/timeseries/data_utils.clj | 53 ++++++++++++------- .../resources/timeseries_lifecycle_test.clj | 49 +++++++++++------ 2 files changed, 67 insertions(+), 35 deletions(-) diff --git a/code/src/sixsq/nuvla/server/resources/timeseries/data_utils.clj b/code/src/sixsq/nuvla/server/resources/timeseries/data_utils.clj index 617fb03be..e57ee2991 100644 --- a/code/src/sixsq/nuvla/server/resources/timeseries/data_utils.clj +++ b/code/src/sixsq/nuvla/server/resources/timeseries/data_utils.clj @@ -62,7 +62,9 @@ (defn parse-params [{:keys [query from to granularity custom-es-aggregations] :as params} {:keys [accept] :as _request}] - (let [queries (if (coll? query) query [query]) + (let [queries (if (coll? query) + query + (if (some? query) [query] [])) raw (= "raw" granularity) predefined-aggregations (not (or raw custom-es-aggregations)) custom-es-aggregations (cond-> custom-es-aggregations @@ -85,8 +87,9 @@ params) (defn throw-mandatory-query-parameter - [{:keys [queries] :as params}] - (when-not (seq queries) (logu/log-and-throw-400 "query parameter is mandatory")) + [{:keys [raw queries] :as params}] + (when (and (not raw) (not (seq queries))) + (logu/log-and-throw-400 "query parameter is mandatory")) params) (defn throw-mandatory-from-to-parameters @@ -420,7 +423,7 @@ (defn csv-dimension-keys-fn [{:keys [dimensions]} _query-spec] - (fn [{:keys [raw predefined-aggregations queries query-specs mode]}] + (fn [{:keys [raw predefined-aggregations queries query-specs]}] (cond raw [] @@ -435,28 +438,28 @@ [{:keys [dimensions]} _query-spec] (fn [{:keys [predefined-aggregations raw]}] (cond - raw (concat [:timestamp] (map :field-name dimensions)) + raw (concat [:timestamp] (map (comp keyword :field-name) dimensions)) predefined-aggregations [:timestamp :doc-count]))) (defn csv-query-keys-fn - [{:keys [query-name]}] - (fn [{:keys [predefined-aggregations raw queries query-specs resps]}] - (let [{:keys [aggregations response-aggs]} - (get query-specs (first queries))] - (cond - raw - (sort (keys (-> resps ffirst :ts-data first (get query-name)))) - - predefined-aggregations + [{:keys [metrics]} _query-spec] + (fn [{:keys [predefined-aggregations raw queries query-specs]}] + (cond + raw + (sort (map :field-name metrics)) + + predefined-aggregations + (let [{:keys [aggregations response-aggs]} + (get query-specs (first queries))] (or response-aggs (keys aggregations)))))) (defn csv-data-fn - [{:keys [query-name]}] + [_query-spec] (fn [{:keys [predefined-aggregations raw]} {:keys [aggregations] :as data-point} metric-key] (cond raw - (get-in data-point [query-name metric-key]) + (get data-point (keyword metric-key)) predefined-aggregations (get-in aggregations [(keyword metric-key) :value])))) @@ -465,14 +468,14 @@ [timeseries ts-query] (csv-export-fn (csv-dimension-keys-fn timeseries ts-query) (csv-meta-keys-fn timeseries ts-query) - (csv-query-keys-fn ts-query) + (csv-query-keys-fn timeseries ts-query) (csv-data-fn ts-query))) (defmethod ts-query->query-spec "standard" [{:keys [timeseries]} {:keys [query] :as ts-query}] - {:query-fn generic-query-fn - :aggregations (some-> query :aggregations parse-aggregations) - :csv-export-fn (generic-csv-export-fn timeseries ts-query)}) + {:query-fn generic-query-fn + :aggregations (some-> query :aggregations parse-aggregations) + :csv-export-fn (generic-csv-export-fn timeseries ts-query)}) (defmethod ts-query->query-spec "custom-es-query" [_params {:keys [custom-es-query] :as _ts-query}] @@ -521,6 +524,15 @@ 404))) params) +(defn assoc-raw-query + "If granularity is raw and no query is specified, generate a new query returning all the metrics." + [{:keys [granularity query timeseries] :as params}] + (cond-> params + (and (= "raw" granularity) (nil? query)) + (assoc :query ["raw"] + :query-specs {"raw" {:query-fn generic-query-fn + :csv-export-fn (generic-csv-export-fn timeseries {:query-name "raw"})}}))) + (defn generic-ts-query-data [params request] (-> params @@ -529,6 +541,7 @@ (assoc-dimensions-filters) (throw-invalid-dimensions) (throw-timeseries-not-created-yet) + (assoc-raw-query) (query-data request))) (defn wrapped-query-data diff --git a/code/test/sixsq/nuvla/server/resources/timeseries_lifecycle_test.clj b/code/test/sixsq/nuvla/server/resources/timeseries_lifecycle_test.clj index 675f4f694..03008f57a 100644 --- a/code/test/sixsq/nuvla/server/resources/timeseries_lifecycle_test.clj +++ b/code/test/sixsq/nuvla/server/resources/timeseries_lifecycle_test.clj @@ -4,8 +4,8 @@ [clojure.string :as str] [clojure.test :refer [deftest is testing use-fixtures]] [peridot.core :refer [content-type header request session]] - [sixsq.nuvla.db.es.binding :as es-binding] [ring.util.codec :as rc] + [sixsq.nuvla.db.es.binding :as es-binding] [sixsq.nuvla.db.impl :as db] [sixsq.nuvla.server.app.params :as p] [sixsq.nuvla.server.middleware.authn-info :refer [authn-info-header]] @@ -402,9 +402,9 @@ (request data-op-url :body (rc/form-encode (cond-> - {:query queries - :from (if from (time/to-str from) from-str) - :to (if to (time/to-str to) to-str)} + {:from (if from (time/to-str from) from-str) + :to (if to (time/to-str to) to-str)} + queries (assoc :query queries) dimensions-filters (assoc :dimension-filter dimensions-filters) granularity (assoc :granularity granularity))))))] @@ -477,7 +477,7 @@ (ltu/body->edn) (ltu/is-status 400) (ltu/is-key-value :message "invalid dimensions: wrong-dimension")))) - (testing "raw query" + (testing "raw data with query" (let [from (time/minus now (time/duration-unit 1 :days)) to now metric-data (-> (metrics-request {:queries [query1] @@ -491,6 +491,18 @@ :ts-data (set (map #(update-keys % keyword) datapoints))}] (-> (get metric-data (keyword query1)) (update-in [0 :ts-data] set)))))) + (testing "raw data without query" + (let [from (time/minus now (time/duration-unit 1 :days)) + to now + metric-data (-> (metrics-request {:from from + :to to + :granularity "raw"}) + (ltu/is-status 200) + (ltu/body->edn) + (ltu/body))] + (is (= {:raw [{:dimensions {(keyword dimension1) "all"} + :ts-data (set (map #(update-keys % keyword) datapoints))}]} + (update-in metric-data [:raw 0 :ts-data] set))))) #_(testing "custom es query" (let [from (time/minus now (time/duration-unit 1 :days)) to now @@ -510,11 +522,11 @@ (let [from (time/minus now (time/duration-unit 1 :days)) to now csv-request (fn [query granularity] - (-> (metrics-request {:accept-header "text/csv" - :queries [query] - :from from - :to to - :granularity granularity}) + (-> (metrics-request (cond-> {:accept-header "text/csv" + :from from + :to to + :granularity granularity} + query (assoc :queries [query]))) (ltu/is-status 200) (ltu/is-header "Content-Type" "text/csv") (ltu/is-header "Content-disposition" "attachment;filename=export.csv") @@ -526,11 +538,18 @@ (str/join "," ["all" (time/to-str midnight-today) 2 15]) "\n") (csv-request query1 "1-days")))) - #_(testing "Export raw data" - (is (= (str "timestamp,test-dimension1,test-metric1\n" - (str/join "," [(time/to-str midnight-yesterday) - 10 5.5]) "\n") - (csv-request query1 "raw")))) + (testing "Export raw data to csv" + (is (= (into #{["timestamp" "test-dimension1" "test-metric1" "test-metric2"]} + (map (fn [{:keys [timestamp] :as datapoint}] + [timestamp + (get datapoint dimension1) + (str (int (get datapoint metric1))) + (str (int (get datapoint metric2)))]) + datapoints)) + (-> (csv-request nil "raw") + (str/split #"\n") + (->> (mapv #(str/split % #","))) + set)))) (testing "Export with custom queries not allowed" ))))))) From 058448195033fed7fa01e8c775e540c575477fba Mon Sep 17 00:00:00 2001 From: Alessandro Bellucci Date: Mon, 6 May 2024 10:18:31 +0200 Subject: [PATCH 25/28] Fix error code in case of conflict --- code/src/sixsq/nuvla/db/es/binding.clj | 17 +++++++++++------ .../ts_nuvlaedge_telemetry_lifecycle_test.clj | 2 +- 2 files changed, 12 insertions(+), 7 deletions(-) diff --git a/code/src/sixsq/nuvla/db/es/binding.clj b/code/src/sixsq/nuvla/db/es/binding.clj index e8aee6a12..76583d1ea 100644 --- a/code/src/sixsq/nuvla/db/es/binding.clj +++ b/code/src/sixsq/nuvla/db/es/binding.clj @@ -280,12 +280,17 @@ success? (not (errors? response))] (if success? body-response - (let [items (:items body-response) - msg (str (if (seq items) - {:errors-count (count items) - :first-error (first items)} - body-response))] - (es-logu/log-and-throw-unexpected-es-ex msg (ex-info msg {})))))) + (let [items (:items body-response) + status-codes (map (comp :status second first) items) + msg (str (if (seq items) + {:errors-count (count items) + :first-error (first items)} + body-response))] + (cond + (some #{409} status-codes) + (es-logu/throw-conflict-ex "") + :else + (es-logu/log-and-throw-unexpected-es-ex msg (ex-info msg {}))))))) (defn bulk-insert-timeseries-datapoints [client index data _options] diff --git a/code/test/sixsq/nuvla/server/resources/ts_nuvlaedge_telemetry_lifecycle_test.clj b/code/test/sixsq/nuvla/server/resources/ts_nuvlaedge_telemetry_lifecycle_test.clj index ce3c1c703..a16f37936 100644 --- a/code/test/sixsq/nuvla/server/resources/ts_nuvlaedge_telemetry_lifecycle_test.clj +++ b/code/test/sixsq/nuvla/server/resources/ts_nuvlaedge_telemetry_lifecycle_test.clj @@ -106,7 +106,7 @@ :request-method :patch :body (json/write-str conflicting-entries)) (ltu/body->edn) - (ltu/is-status 400)))))) + (ltu/is-status 409)))))) (deftest query-ram (let [session-anon (-> (ltu/ring-app) From 705132ecddec91813092b887443e181d1b6f8abb Mon Sep 17 00:00:00 2001 From: Alessandro Bellucci Date: Wed, 8 May 2024 16:59:31 +0200 Subject: [PATCH 26/28] Query with custom aggregations --- .../server/resources/nuvlabox/data_utils.clj | 122 +++++++++--------- .../server/resources/spec/timeseries.cljc | 13 +- .../resources/timeseries/data_utils.clj | 111 ++++++++-------- .../server/resources/timeseries/utils.clj | 15 ++- .../resources/timeseries_lifecycle_test.clj | 77 ++++++++--- 5 files changed, 197 insertions(+), 141 deletions(-) diff --git a/code/src/sixsq/nuvla/server/resources/nuvlabox/data_utils.clj b/code/src/sixsq/nuvla/server/resources/nuvlabox/data_utils.clj index a9d56dad4..94d9ae843 100644 --- a/code/src/sixsq/nuvla/server/resources/nuvlabox/data_utils.clj +++ b/code/src/sixsq/nuvla/server/resources/nuvlabox/data_utils.clj @@ -1,5 +1,6 @@ (ns sixsq.nuvla.server.resources.nuvlabox.data-utils (:require + [clojure.data.json :as json] [clojure.set :as set] [clojure.string :as str] [clojure.tools.logging :as log] @@ -443,13 +444,15 @@ (let [[_ n unit] (re-matches #"(.*)-(.*)" (name granularity))] (try (time/duration (Integer/parseInt n) (keyword unit)) - (catch Exception _ + (catch Exception e + (log/error e) (logu/log-and-throw-400 (str "unrecognized value for granularity " granularity)))))) (defn precompute-query-params - [{:keys [predefined-aggregations granularity] :as query-opts}] + [{:keys [raw query-type granularity] :as query-opts}] (cond-> query-opts - predefined-aggregations (assoc :granularity-duration (granularity->duration granularity)))) + (and (not raw) (= ts-data-utils/query-type-standard query-type)) + (assoc :granularity-duration (granularity->duration granularity)))) (defn available-before? [{:keys [first-availability] :as _nuvlabox} timestamp] @@ -566,8 +569,8 @@ resp)) (defn compute-nuvlabox-availability - [[{:keys [predefined-aggregations granularity-duration nuvlaboxes] :as query-opts} resp]] - (if predefined-aggregations + [[{:keys [raw query-type granularity-duration nuvlaboxes] :as query-opts} resp]] + (if (and (not raw) (= ts-data-utils/query-type-standard query-type)) (let [nuvlabox (first nuvlaboxes) now (time/now) hits (->> (get-in resp [0 :hits]) @@ -587,12 +590,9 @@ (defn csv-dimension-keys-fn [] - (fn [{:keys [raw predefined-aggregations queries query-specs mode]}] - (cond - raw + (fn [{:keys [raw queries query-specs mode]}] + (if raw [] - - predefined-aggregations (let [{group-by-field :group-by} (get query-specs (first queries)) dimension-keys (case mode :single-edge-query @@ -600,35 +600,30 @@ :multi-edge-query [:nuvlaedge-count])] (cond-> dimension-keys - (and predefined-aggregations group-by-field) (conj group-by-field)))))) + group-by-field (conj group-by-field)))))) (defn csv-meta-keys-fn [] - (fn [{:keys [mode predefined-aggregations raw]}] - (cond - raw (case mode - :single-edge-query - [:timestamp] - :multi-edge-query - [:timestamp :nuvlaedge-id]) - predefined-aggregations [:timestamp :doc-count]))) + (fn [{:keys [mode raw]}] + (if raw + (case mode + :single-edge-query + [:timestamp] + :multi-edge-query + [:timestamp :nuvlaedge-id]) + [:timestamp :doc-count]))) (defn availability-csv-metric-keys-fn [] - (fn [{:keys [predefined-aggregations raw queries query-specs]}] + (fn [{:keys [raw queries query-specs]}] (let [{:keys [response-aggs]} (get query-specs (first queries))] - (cond - raw [:online] - predefined-aggregations response-aggs)))) + (if raw [:online] response-aggs)))) (defn availability-csv-data-fn [] - (fn [{:keys [predefined-aggregations raw]} {:keys [aggregations] :as data-point} metric-key] - (cond - raw + (fn [{:keys [raw]} {:keys [aggregations] :as data-point} metric-key] + (if raw (get data-point metric-key) - - predefined-aggregations (get-in aggregations [metric-key :value])))) (defn availability-csv-export-fn @@ -640,25 +635,19 @@ (defn telemetry-csv-metric-keys-fn [metric] - (fn [{:keys [predefined-aggregations raw queries query-specs resps]}] + (fn [{:keys [raw queries query-specs resps]}] (let [{:keys [aggregations response-aggs]} (get query-specs (first queries))] - (cond - raw + (if raw (sort (keys (-> resps ffirst :ts-data first (get metric)))) - - predefined-aggregations (or response-aggs (keys aggregations)))))) (defn telemetry-csv-data-fn [metric] - (fn [{:keys [predefined-aggregations raw]} + (fn [{:keys [raw]} {:keys [aggregations] :as data-point} metric-key] - (cond - raw + (if raw (get-in data-point [metric metric-key]) - - predefined-aggregations (get-in aggregations [metric-key :value])))) (defn telemetry-csv-export-fn @@ -761,8 +750,8 @@ nb-resps))) (defn compute-nuvlaboxes-availabilities - [[{:keys [predefined-aggregations] :as query-opts} resp]] - (if predefined-aggregations + [[{:keys [raw query-type] :as query-opts} resp]] + (if (and (not raw) (= ts-data-utils/query-type-standard query-type)) (let [now (time/now) hits (->> (get-in resp [0 :hits]) (map #(update % :timestamp time/parse-date)) @@ -775,11 +764,11 @@ [query-opts resp])) (defn compute-global-availability - [[{:keys [predefined-aggregations] :as query-opts} resp]] + [[{:keys [raw query-type] :as query-opts} resp]] [query-opts (cond-> resp - predefined-aggregations + (and (not raw) (= ts-data-utils/query-type-standard query-type)) (update-resp-ts-data-point-aggs (fn [_ts-data-point {:keys [by-edge] :as aggs}] (let [avgs-count (count (:buckets by-edge)) @@ -795,11 +784,11 @@ nil)})))))]) (defn add-edges-count - [[{:keys [predefined-aggregations] :as query-opts} resp]] + [[{:keys [raw query-type] :as query-opts} resp]] [query-opts (cond-> resp - predefined-aggregations + (and (not raw) (= ts-data-utils/query-type-standard query-type)) (update-resp-ts-data-point-aggs (fn [_ts-data-point {:keys [by-edge] :as aggs}] (assoc aggs :edges-count {:value (count (:buckets by-edge))}))))]) @@ -813,8 +802,8 @@ (partial map (partial f ts-data-point)))))) (defn add-edge-names-fn - [[{:keys [predefined-aggregations nuvlaboxes] :as query-opts} resp]] - (if predefined-aggregations + [[{:keys [raw query-type nuvlaboxes] :as query-opts} resp]] + (if (and (not raw) (= ts-data-utils/query-type-standard query-type)) (let [edge-names-by-id (->> nuvlaboxes (map (fn [{:keys [id name]}] [id name])) @@ -827,8 +816,8 @@ [query-opts resp])) (defn add-missing-edges-fn - [[{:keys [predefined-aggregations granularity-duration nuvlaboxes] :as query-opts} resp]] - (if predefined-aggregations + [[{:keys [raw query-type granularity-duration nuvlaboxes] :as query-opts} resp]] + (if (and (not raw) (= ts-data-utils/query-type-standard query-type)) (letfn [(update-buckets [ts-data-point buckets] (let [bucket-edge-ids (set (map :key buckets)) @@ -976,8 +965,8 @@ (used-memory))))))) (defn query-and-process-availabilities - [{:keys [predefined-aggregations nuvlaboxes] :as options}] - (if predefined-aggregations + [{:keys [raw query-type nuvlaboxes] :as options}] + (if (and (not raw) (= ts-data-utils/query-type-standard query-type)) (let [ret (query-and-process-availabilities* options)] [{:dimensions {:nuvlaedge-count (count nuvlaboxes)} :ts-data (mapv @@ -1108,12 +1097,29 @@ uuid (assoc :id (u/resource-id "nuvlabox" uuid)))) (defn assoc-query-specs - [{:keys [mode] :as params}] - (assoc params - :query-specs - (case mode - :single-edge-query (single-edge-queries) - :multi-edge-query (multi-edge-queries)))) + [{:keys [mode custom-es-aggregations] :as params} _request] + (let [custom-es-aggregations (cond-> custom-es-aggregations + (string? custom-es-aggregations) + json/read-str) + query-specs (cond-> (case mode + :single-edge-query (single-edge-queries) + :multi-edge-query (multi-edge-queries)) + + (not custom-es-aggregations) + (update-vals #(assoc % :query-type ts-data-utils/query-type-standard)) + + custom-es-aggregations + (update-vals #(-> % + (assoc :query-type ts-data-utils/query-type-custom-es-query) + (assoc :aggregations custom-es-aggregations))))] + (assoc params :query-specs query-specs))) + +(defn throw-custom-es-aggregations-checks + [{:keys [granularity custom-es-aggregations] :as params}] + (when custom-es-aggregations + (when granularity + (logu/log-and-throw-400 "when custom-es-aggregations is specified, granularity parameter must be omitted"))) + params) (defn assoc-query [{:keys [dataset] :as params}] @@ -1123,9 +1129,10 @@ (defn query-data [params request] (-> params + (throw-custom-es-aggregations-checks) (assoc-nuvlabox-id) (assoc-query) - (assoc-query-specs) + (assoc-query-specs request) (ts-data-utils/query-data request))) (defn gated-query-data @@ -1174,4 +1181,3 @@ [params request] (let [query-data (ts-data-utils/wrap-query-data-accept (partial gated-query-data params))] (query-data request))) - diff --git a/code/src/sixsq/nuvla/server/resources/spec/timeseries.cljc b/code/src/sixsq/nuvla/server/resources/spec/timeseries.cljc index d69e34b61..c1ba7758b 100644 --- a/code/src/sixsq/nuvla/server/resources/spec/timeseries.cljc +++ b/code/src/sixsq/nuvla/server/resources/spec/timeseries.cljc @@ -17,10 +17,15 @@ :json-schema/type "string" :json-schema/description "Timeseries field name")) +(s/def ::description + (-> (st/spec string?) + (assoc :json-schema/description "human-readable description"))) + (s/def ::dimension (assoc (st/spec (su/only-keys :req-un [::field-name - ::field-type])) + ::field-type] + :opt-un [::description])) :json-schema/type "map" :json-schema/description "Timeseries dimension")) @@ -45,7 +50,8 @@ :req-un [::field-name ::field-type ::metric-type] - :opt-un [::optional])) + :opt-un [::description + ::optional])) :json-schema/type "map" :json-schema/description "Timeseries metric")) @@ -102,7 +108,8 @@ (assoc (st/spec (su/only-keys :req-un [::query-name ::query-type] - :opt-un [::query + :opt-un [::description + ::query ::custom-es-query])) :json-schema/type "map" :json-schema/description "Timeseries query definition")) diff --git a/code/src/sixsq/nuvla/server/resources/timeseries/data_utils.clj b/code/src/sixsq/nuvla/server/resources/timeseries/data_utils.clj index e57ee2991..fb1851ed1 100644 --- a/code/src/sixsq/nuvla/server/resources/timeseries/data_utils.clj +++ b/code/src/sixsq/nuvla/server/resources/timeseries/data_utils.clj @@ -19,6 +19,9 @@ (def max-data-points 200) +(def query-type-standard "standard") +(def query-type-custom-es-query "custom-es-query") + (defn update-resp-ts-data [resp f] (-> resp @@ -49,10 +52,10 @@ (logu/log-and-throw-400 (str "unrecognized value for granularity " granularity)))))) (defn keep-response-aggs-only - [{:keys [predefined-aggregations response-aggs] :as _query-opts} resp] + [{:keys [query-type response-aggs] :as _query-opts} resp] (cond-> resp - predefined-aggregations + (= query-type-standard query-type) (update-resp-ts-data-point-aggs (fn [_ts-data-point aggs] (if response-aggs @@ -65,20 +68,13 @@ (let [queries (if (coll? query) query (if (some? query) [query] [])) - raw (= "raw" granularity) - predefined-aggregations (not (or raw custom-es-aggregations)) - custom-es-aggregations (cond-> custom-es-aggregations - (string? custom-es-aggregations) - json/read-str)] + raw (= "raw" granularity)] (-> params (assoc :mime-type (:mime accept)) (assoc :queries queries) (assoc :from (time/parse-date from)) (assoc :to (time/parse-date to)) - (cond-> - raw (assoc :raw true) - predefined-aggregations (assoc :predefined-aggregations true) - custom-es-aggregations (assoc :custom-es-aggregations custom-es-aggregations))))) + (cond-> raw (assoc :raw true))))) (defn throw-response-format-not-supported [{:keys [mime-type] :as params}] @@ -107,21 +103,18 @@ params) (defn throw-mandatory-granularity-parameter - [{:keys [raw granularity custom-es-aggregations] :as params}] - (when (and (not raw) (not custom-es-aggregations) (empty? granularity)) - (logu/log-and-throw-400 "granularity parameter is mandatory")) - params) - -(defn throw-custom-es-aggregations-checks - [{:keys [custom-es-aggregations granularity] :as params}] - (when custom-es-aggregations - (when granularity - (logu/log-and-throw-400 "when custom-es-aggregations is specified, granularity parameter must be omitted"))) + [{:keys [raw granularity queries query-specs] :as params}] + (when (and (not raw) + (->> (select-keys query-specs queries) + vals + (some #(= query-type-standard (:query-type %)))) + (empty? granularity)) + (logu/log-and-throw-400 "granularity parameter is mandatory with standard queries")) params) (defn throw-too-many-data-points - [{:keys [from to granularity predefined-aggregations] :as params}] - (when predefined-aggregations + [{:keys [from to granularity raw] :as params}] + (when (and granularity (not raw)) (let [max-n-buckets max-data-points n-buckets (.dividedBy (time/duration from to) (granularity->duration granularity))] @@ -151,9 +144,9 @@ (cond-> params filter (assoc :cimi-filter filter))) (defn assoc-ts-interval - [{:keys [predefined-aggregations granularity] :as params}] + [{:keys [raw granularity] :as params}] (cond-> params - predefined-aggregations + (and granularity (not raw)) (assoc :ts-interval (granularity->ts-interval granularity)))) (defn throw-unknown-queries @@ -171,13 +164,13 @@ params) (defn run-query - [params query-specs query-key] + [{:keys [raw] :as params} query-specs query-key] (let [{:keys [pre-process-fn query-fn post-process-fn] :as query-spec} (get query-specs query-key) - {:keys [predefined-aggregations] :as query-opts} (merge params query-spec) + {:keys [query-type] :as query-opts} (merge params query-spec) query-opts (if pre-process-fn (doall (pre-process-fn query-opts)) query-opts)] (cond->> (doall (query-fn query-opts)) post-process-fn ((fn [resp] (doall (second (post-process-fn [query-opts resp]))))) - predefined-aggregations (keep-response-aggs-only query-opts)))) + (and (not raw) (= query-type-standard query-type)) (keep-response-aggs-only query-opts)))) (defn run-queries [{:keys [queries query-specs] :as params}] @@ -214,7 +207,6 @@ (throw-from-not-before-to) (throw-mandatory-granularity-parameter) (throw-too-many-data-points) - (throw-custom-es-aggregations-checks) (assoc-request request) (assoc-cimi-filter) (assoc-ts-interval) @@ -254,7 +246,7 @@ {field-name {:count (count v)}}))) (into {}))) -(defn ->predefined-aggregations-resp +(defn ->standard-query-resp [{:keys [aggregations ->resp-dimensions-fn] group-by-field :group-by :as params} resp] (let [ts-data (fn [tsds-stats] (map @@ -277,7 +269,7 @@ :ts-data (ts-data (get-in resp [0 :aggregations :tsds-stats]))} (seq hits) (assoc :hits hits))]))) -(defn ->custom-es-aggregations-resp +(defn ->custom-es-query-resp [{:keys [->resp-dimensions-fn] :as params} resp] (let [ts-data (fn [tsds-stats] (map @@ -297,24 +289,25 @@ :ts-data (sort-by :timestamp hits)}])) (defn ->ts-query-resp - [{:keys [predefined-aggregations custom-es-aggregations raw] :as params} resp] + [{:keys [query-type raw] :as params} resp] (cond - predefined-aggregations - (->predefined-aggregations-resp params resp) - raw (->raw-resp params resp) - custom-es-aggregations - (->custom-es-aggregations-resp params resp))) + (= query-type-standard query-type) + (->standard-query-resp params resp) + + (= query-type-custom-es-query query-type) + (->custom-es-query-resp params resp))) (defn build-aggregations-clause - [{:keys [predefined-aggregations raw custom-es-aggregations from to ts-interval aggregations] group-by-field :group-by}] + [{:keys [from to ts-interval raw query-type aggregations] + group-by-field :group-by}] (cond raw {} ;; send an empty :tsds-aggregation to avoid acl checks. TODO: find a cleaner way - predefined-aggregations + (= query-type-standard query-type) (let [tsds-aggregations {:tsds-stats {:date_histogram {:field "@timestamp" @@ -330,8 +323,8 @@ :aggregations tsds-aggregations}}} {:aggregations tsds-aggregations})) - custom-es-aggregations - {:aggregations custom-es-aggregations})) + (= query-type-custom-es-query query-type) + {:aggregations aggregations})) (defn dimension-filter->cimi-filter [[dimension values]] @@ -422,46 +415,46 @@ (first resps)))) (defn csv-dimension-keys-fn - [{:keys [dimensions]} _query-spec] - (fn [{:keys [raw predefined-aggregations queries query-specs]}] + [{:keys [dimensions]} {:keys [query-type] :as _query-spec}] + (fn [{:keys [raw queries query-specs]}] (cond raw [] - predefined-aggregations + (= query-type-standard query-type) (let [{group-by-field :group-by} (get query-specs (first queries)) dimension-keys (map :field-name dimensions)] (cond-> dimension-keys - (and predefined-aggregations group-by-field) (conj group-by-field)))))) + group-by-field (conj group-by-field)))))) (defn csv-meta-keys-fn - [{:keys [dimensions]} _query-spec] - (fn [{:keys [predefined-aggregations raw]}] + [{:keys [dimensions]} {:keys [query-type] :as _query-spec}] + (fn [{:keys [raw]}] (cond raw (concat [:timestamp] (map (comp keyword :field-name) dimensions)) - predefined-aggregations [:timestamp :doc-count]))) + (= query-type-standard query-type) [:timestamp :doc-count]))) (defn csv-query-keys-fn - [{:keys [metrics]} _query-spec] - (fn [{:keys [predefined-aggregations raw queries query-specs]}] + [{:keys [metrics]} {:keys [query-type] :as _query-spec}] + (fn [{:keys [raw queries query-specs]}] (cond raw (sort (map :field-name metrics)) - predefined-aggregations + (= query-type-standard query-type) (let [{:keys [aggregations response-aggs]} (get query-specs (first queries))] (or response-aggs (keys aggregations)))))) (defn csv-data-fn - [_query-spec] - (fn [{:keys [predefined-aggregations raw]} + [{:keys [query-type] :as _query-spec}] + (fn [{:keys [raw]} {:keys [aggregations] :as data-point} metric-key] (cond raw (get data-point (keyword metric-key)) - predefined-aggregations + (= query-type-standard query-type) (get-in aggregations [(keyword metric-key) :value])))) (defn generic-csv-export-fn @@ -471,15 +464,17 @@ (csv-query-keys-fn timeseries ts-query) (csv-data-fn ts-query))) -(defmethod ts-query->query-spec "standard" +(defmethod ts-query->query-spec query-type-standard [{:keys [timeseries]} {:keys [query] :as ts-query}] - {:query-fn generic-query-fn + {:query-type query-type-standard + :query-fn generic-query-fn :aggregations (some-> query :aggregations parse-aggregations) :csv-export-fn (generic-csv-export-fn timeseries ts-query)}) -(defmethod ts-query->query-spec "custom-es-query" +(defmethod ts-query->query-spec query-type-custom-es-query [_params {:keys [custom-es-query] :as _ts-query}] - {:query-fn generic-query-fn + {:query-type query-type-custom-es-query + :query-fn generic-query-fn :aggregations (some-> custom-es-query :aggregations)}) (defn assoc-query-specs diff --git a/code/src/sixsq/nuvla/server/resources/timeseries/utils.clj b/code/src/sixsq/nuvla/server/resources/timeseries/utils.clj index 01b96b3c3..d34729d7e 100644 --- a/code/src/sixsq/nuvla/server/resources/timeseries/utils.clj +++ b/code/src/sixsq/nuvla/server/resources/timeseries/utils.clj @@ -51,8 +51,10 @@ {current-dimensions :dimensions :as _current}] (when current-dimensions (when-not (and (>= (count new-dimensions) (count current-dimensions)) - (= current-dimensions - (subvec new-dimensions 0 (count current-dimensions)))) + (= (map #(dissoc % :description) + current-dimensions) + (map #(dissoc % :description) + (subvec new-dimensions 0 (count current-dimensions))))) (throw (r/ex-response "dimensions can only be appended" 400)))) request) @@ -60,10 +62,11 @@ [{{new-metrics :metrics} :body :as request} {current-metrics :metrics :as _current}] (when-not (every? (fn [{:keys [field-name] :as current-metric}] - (= current-metric - (->> new-metrics - (filter #(= field-name (:field-name %))) - first))) + (= (dissoc current-metric :description) + (dissoc (->> new-metrics + (filter #(= field-name (:field-name %))) + first) + :description))) current-metrics) (throw (r/ex-response "metrics can only be added" 400))) request) diff --git a/code/test/sixsq/nuvla/server/resources/timeseries_lifecycle_test.clj b/code/test/sixsq/nuvla/server/resources/timeseries_lifecycle_test.clj index 03008f57a..1b9751c70 100644 --- a/code/test/sixsq/nuvla/server/resources/timeseries_lifecycle_test.clj +++ b/code/test/sixsq/nuvla/server/resources/timeseries_lifecycle_test.clj @@ -29,21 +29,26 @@ (def aggregation1 "test-metric1-avg") (def valid-entry {:dimensions [{:field-name dimension1 - :field-type "keyword"}] + :field-type "keyword" + :description "description of dimension 1"}] :metrics [{:field-name metric1 :field-type "double" - :metric-type "gauge"} + :metric-type "gauge" + :description "description of metric 1"} {:field-name metric2 :field-type "long" + :description "description of metric 2" :metric-type "counter" :optional true}] :queries [{:query-name query1 :query-type "standard" + :description "description of query 1" :query {:aggregations [{:aggregation-name aggregation1 :aggregation-type "avg" :field-name metric1}]}} {:query-name query2 :query-type "custom-es-query" + :description "description of query 2" :custom-es-query {:aggregations {:agg1 {:date_histogram {:field "@timestamp" @@ -503,20 +508,60 @@ (is (= {:raw [{:dimensions {(keyword dimension1) "all"} :ts-data (set (map #(update-keys % keyword) datapoints))}]} (update-in metric-data [:raw 0 :ts-data] set))))) - #_(testing "custom es query" - (let [from (time/minus now (time/duration-unit 1 :days)) - to now - metric-data (-> (metrics-request {:queries [query2] - :from from - :to to - :granularity "1-days"}) - (ltu/is-status 200) - (ltu/body->edn) - (ltu/body))] - (is (= [{:dimensions {(keyword dimension1) "all"} - :ts-data (set (map #(update-keys % keyword) datapoints))}] - (-> (get metric-data (keyword query1)) - (update-in [0 :ts-data] set)))))) + + (testing "raw data with dimension filter" + (let [from (time/minus now (time/duration-unit 1 :days)) + to now + metric-data (-> (metrics-request {:dimensions-filters [(str dimension1 "=" d1-val1)] + :from from + :to to + :granularity "raw"}) + (ltu/is-status 200) + (ltu/body->edn) + (ltu/body))] + (is (= {:raw [{:dimensions {(keyword dimension1) d1-val1} + :ts-data (->> datapoints + (filter #(= d1-val1 (get % dimension1))) + (map #(update-keys % keyword)) + set)}]} + (update-in metric-data [:raw 0 :ts-data] set))))) + + (testing "custom es query" + (let [from (time/minus now (time/duration-unit 1 :days)) + to now] + (testing "query with no dimensions filter" + (let [metric-data (-> (metrics-request {:queries [query2] + :from from + :to to}) + (ltu/is-status 200) + (ltu/body->edn) + (ltu/body))] + (is (= [{:dimensions {(keyword dimension1) "all"} + :agg1 [{:timestamp (time/to-str midnight-today) + :doc-count 2 + :aggregations {:custom-agg {:avg 15.0 + :count 2 + :max 20.0 + :min 10.0 + :sum 30.0}}}]}] + (get metric-data (keyword query2)))))) + (testing "query with dimensions filter" + (let [metric-data (-> (metrics-request {:dimensions-filters [(str dimension1 "=" d1-val1)] + :queries [query2] + :from from + :to to}) + (ltu/is-status 200) + (ltu/body->edn) + (ltu/body))] + (is (= [{:dimensions {(keyword dimension1) d1-val1} + :agg1 [{:timestamp (time/to-str midnight-today) + :doc-count 1 + :aggregations {:custom-agg {:avg 10.0 + :count 1 + :max 10.0 + :min 10.0 + :sum 10.0}}}]}] + (get metric-data (keyword query2)))))))) (testing "csv export" (let [from (time/minus now (time/duration-unit 1 :days)) From 17ee9536b044d413c3974c93d0560a9553eb1eda Mon Sep 17 00:00:00 2001 From: Alessandro Bellucci Date: Thu, 9 May 2024 06:46:06 +0200 Subject: [PATCH 27/28] Add test to check returned status code on invalid custom queries --- code/src/sixsq/nuvla/db/es/binding.clj | 5 +-- .../resources/timeseries_lifecycle_test.clj | 31 +++++++++++++------ 2 files changed, 22 insertions(+), 14 deletions(-) diff --git a/code/src/sixsq/nuvla/db/es/binding.clj b/code/src/sixsq/nuvla/db/es/binding.clj index 76583d1ea..5e0198499 100644 --- a/code/src/sixsq/nuvla/db/es/binding.clj +++ b/code/src/sixsq/nuvla/db/es/binding.clj @@ -234,10 +234,7 @@ (let [msg (str "error when querying: " (:body response))] (throw (r/ex-response msg 500))))) (catch Exception e - (let [{:keys [body] :as _response} (ex-data e) - error (:error body) - msg (str "unexpected exception querying: " (or error e))] - (throw (r/ex-response msg 500)))))) + (es-logu/log-and-throw-unexpected-es-ex e)))) (defn query-data-native [client index query] diff --git a/code/test/sixsq/nuvla/server/resources/timeseries_lifecycle_test.clj b/code/test/sixsq/nuvla/server/resources/timeseries_lifecycle_test.clj index 1b9751c70..8649479f6 100644 --- a/code/test/sixsq/nuvla/server/resources/timeseries_lifecycle_test.clj +++ b/code/test/sixsq/nuvla/server/resources/timeseries_lifecycle_test.clj @@ -26,10 +26,11 @@ (def query1 "test-query1") (def query2 "test-query2") +(def query3 "test-query3-invalid") (def aggregation1 "test-metric1-avg") -(def valid-entry {:dimensions [{:field-name dimension1 - :field-type "keyword" +(def valid-entry {:dimensions [{:field-name dimension1 + :field-type "keyword" :description "description of dimension 1"}] :metrics [{:field-name metric1 :field-type "double" @@ -40,21 +41,26 @@ :description "description of metric 2" :metric-type "counter" :optional true}] - :queries [{:query-name query1 - :query-type "standard" + :queries [{:query-name query1 + :query-type "standard" :description "description of query 1" - :query {:aggregations [{:aggregation-name aggregation1 - :aggregation-type "avg" - :field-name metric1}]}} + :query {:aggregations [{:aggregation-name aggregation1 + :aggregation-type "avg" + :field-name metric1}]}} {:query-name query2 :query-type "custom-es-query" - :description "description of query 2" + :description "description of query 2" :custom-es-query {:aggregations {:agg1 {:date_histogram {:field "@timestamp" :fixed_interval "1d" :min_doc_count 0} - :aggregations {:custom-agg {:stats {:field metric1}}}}}}}]}) + :aggregations {:custom-agg {:stats {:field metric1}}}}}}} + {:query-name query3 + :query-type "custom-es-query" + :description "invalid query" + :custom-es-query {:aggregations + {:agg1 {:invalid "invalid"}}}}]}) (defn create-timeseries [session entry] @@ -561,7 +567,12 @@ :max 10.0 :min 10.0 :sum 10.0}}}]}] - (get metric-data (keyword query2)))))))) + (get metric-data (keyword query2)))))) + (testing "invalid ES query" + (-> (metrics-request {:queries [query3] + :from from + :to to}) + (ltu/is-status 500))))) (testing "csv export" (let [from (time/minus now (time/duration-unit 1 :days)) From 79cf9ecfd7bbc656900d3db8b111188a094165fa Mon Sep 17 00:00:00 2001 From: Alessandro Bellucci Date: Fri, 10 May 2024 13:03:29 +0200 Subject: [PATCH 28/28] sum and value_count aggregations. Test custom query csv export. --- .../server/resources/spec/timeseries.cljc | 2 +- .../resources/timeseries_lifecycle_test.clj | 42 +++++++++++++++---- 2 files changed, 34 insertions(+), 10 deletions(-) diff --git a/code/src/sixsq/nuvla/server/resources/spec/timeseries.cljc b/code/src/sixsq/nuvla/server/resources/spec/timeseries.cljc index c1ba7758b..9ede7dcfc 100644 --- a/code/src/sixsq/nuvla/server/resources/spec/timeseries.cljc +++ b/code/src/sixsq/nuvla/server/resources/spec/timeseries.cljc @@ -74,7 +74,7 @@ (assoc (st/spec ::core/nonblank-string) :json-schema/description "Timeseries query aggregation name")) -(def aggregation-types #{"avg" "min" "max"}) +(def aggregation-types #{"avg" "min" "max" "sum" "value_count"}) (s/def ::aggregation-type (assoc (st/spec aggregation-types) diff --git a/code/test/sixsq/nuvla/server/resources/timeseries_lifecycle_test.clj b/code/test/sixsq/nuvla/server/resources/timeseries_lifecycle_test.clj index 8649479f6..bd50f7424 100644 --- a/code/test/sixsq/nuvla/server/resources/timeseries_lifecycle_test.clj +++ b/code/test/sixsq/nuvla/server/resources/timeseries_lifecycle_test.clj @@ -28,6 +28,8 @@ (def query2 "test-query2") (def query3 "test-query3-invalid") (def aggregation1 "test-metric1-avg") +(def aggregation2 "test-metric1-value-count") +(def aggregation3 "test-metric1-sum") (def valid-entry {:dimensions [{:field-name dimension1 :field-type "keyword" @@ -46,6 +48,12 @@ :description "description of query 1" :query {:aggregations [{:aggregation-name aggregation1 :aggregation-type "avg" + :field-name metric1} + {:aggregation-name aggregation2 + :aggregation-type "value_count" + :field-name metric1} + {:aggregation-name aggregation3 + :aggregation-type "sum" :field-name metric1}]}} {:query-name query2 :query-type "custom-es-query" @@ -452,10 +460,14 @@ (is (= [{:dimensions {(keyword dimension1) "all"} :ts-data [{:timestamp (time/to-str midnight-yesterday) :doc-count 0 - :aggregations {(keyword aggregation1) {:value nil}}} + :aggregations {(keyword aggregation1) {:value nil} + (keyword aggregation2) {:value 0} + (keyword aggregation3) {:value 0.0}}} {:timestamp (time/to-str midnight-today) :doc-count 2 - :aggregations {(keyword aggregation1) {:value 15.0}}}]}] + :aggregations {(keyword aggregation1) {:value 15.0} + (keyword aggregation2) {:value 2} + (keyword aggregation3) {:value 30.0}}}]}] (get metric-data (keyword query1)))))) (testing "basic query with dimension filter" (let [from (time/minus now (time/duration-unit 1 :days)) @@ -471,10 +483,14 @@ (is (= [{:dimensions {(keyword dimension1) d1-val1} :ts-data [{:timestamp (time/to-str midnight-yesterday) :doc-count 0 - :aggregations {(keyword aggregation1) {:value nil}}} + :aggregations {(keyword aggregation1) {:value nil} + (keyword aggregation2) {:value 0} + (keyword aggregation3) {:value 0.0}}} {:timestamp (time/to-str midnight-today) :doc-count 1 - :aggregations {(keyword aggregation1) {:value 10.0}}}]}] + :aggregations {(keyword aggregation1) {:value 10.0} + (keyword aggregation2) {:value 1} + (keyword aggregation3) {:value 10.0}}}]}] (get metric-data (keyword query1)))))) (testing "basic query with wrong dimension filter" (let [from (time/minus now (time/duration-unit 1 :days)) @@ -572,7 +588,9 @@ (-> (metrics-request {:queries [query3] :from from :to to}) - (ltu/is-status 500))))) + (ltu/body->edn) + (ltu/is-status 500) + (ltu/is-key-value :message "unexpected error"))))) (testing "csv export" (let [from (time/minus now (time/duration-unit 1 :days)) @@ -588,11 +606,11 @@ (ltu/is-header "Content-disposition" "attachment;filename=export.csv") (ltu/body)))] (testing "Basic query" - (is (= (str "test-dimension1,timestamp,doc-count,test-metric1-avg\n" + (is (= (str "test-dimension1,timestamp,doc-count,test-metric1-avg,test-metric1-value-count,test-metric1-sum\n" (str/join "," ["all" (time/to-str midnight-yesterday) - 0 nil]) "\n" + 0 nil 0 0]) "\n" (str/join "," ["all" (time/to-str midnight-today) - 2 15]) "\n") + 2 15 2 30]) "\n") (csv-request query1 "1-days")))) (testing "Export raw data to csv" (is (= (into #{["timestamp" "test-dimension1" "test-metric1" "test-metric2"]} @@ -607,7 +625,13 @@ (->> (mapv #(str/split % #","))) set)))) (testing "Export with custom queries not allowed" - ))))))) + (-> (metrics-request {:accept-header "text/csv" + :from from + :to to + :queries [query3]}) + (ltu/is-status 400) + (ltu/body->edn) + (ltu/is-key-value :message (str "csv export not supported for query " query3)))))))))) (deftest bad-methods (let [resource-uri (str p/service-context (u/new-resource-id t/resource-type))]