Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
65 changes: 65 additions & 0 deletions apps/limiter/src/lim_otel_log_filter.erl
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
%%% @doc
%%% Logger filter для otel_logs handler: конвертирует otel_trace_id и otel_span_id
%%% из hex-формата (32/16 символов) в raw bytes (16/8 байт), как требует OTLP LogRecord.
%%% opentelemetry hex_span_ctx возвращает hex, collector ожидает bytes.
%%% @end
-module(lim_otel_log_filter).

-export([filter/2]).
-export([format_otp_report_utf8/1]).

-spec filter(logger:log_event(), term()) -> logger:filter_return().
filter(#{meta := Meta} = LogEvent, _FilterConfig) ->
case convert_otel_ids(Meta) of
Meta ->
LogEvent;
Meta1 ->
LogEvent#{meta => Meta1}
end.

%% Конвертируем hex -> raw bytes только если формат hex (32/16 символов).
%% OTLP LogRecord: trace_id=16 bytes, span_id=8 bytes.
convert_otel_ids(#{otel_trace_id := TraceIdHex, otel_span_id := SpanIdHex} = Meta) ->
case {hex_to_trace_id_bytes(TraceIdHex), hex_to_span_id_bytes(SpanIdHex)} of
{TraceIdBytes, SpanIdBytes} when TraceIdBytes =/= undefined, SpanIdBytes =/= undefined ->
Meta#{otel_trace_id => TraceIdBytes, otel_span_id => SpanIdBytes};
_ ->
%% Некорректный формат — убираем, чтобы otel_otlp_logs не отправил в OTLP
maps:without([otel_trace_id, otel_span_id, otel_trace_flags], Meta)
end;
convert_otel_ids(Meta) ->
Meta.

%% logger:format_otp_report/1 возвращает chardata (часто list()),
%% из-за чего downstream JSON может сериализовать body как массив байт.
%% Явно приводим к UTF-8 binary(), чтобы body в OTel/Loki был строкой.
-spec format_otp_report_utf8(logger:report()) -> {unicode:chardata(), list()}.
format_otp_report_utf8(Report) ->
Bin =
try logger:format_otp_report(Report) of
{Format, Args} ->
unicode:characters_to_binary(io_lib:format(Format, Args))
catch
_:_ ->
%% Не даём report_cb падать: fallback в печатное представление отчёта.
unicode:characters_to_binary(io_lib:format("~tp", [Report]))
end,
{"~ts", [Bin]}.

hex_to_trace_id_bytes(Hex) when is_binary(Hex), byte_size(Hex) =:= 32 ->
try
<<(binary_to_integer(Hex, 16)):128>>
catch
_:_ -> undefined
end;
hex_to_trace_id_bytes(_) ->
undefined.

hex_to_span_id_bytes(Hex) when is_binary(Hex), byte_size(Hex) =:= 16 ->
try
<<(binary_to_integer(Hex, 16)):64>>
catch
_:_ -> undefined
end;
hex_to_span_id_bytes(_) ->
undefined.
48 changes: 48 additions & 0 deletions apps/limiter/src/lim_otel_log_handler.erl
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
-module(lim_otel_log_handler).

-export([log/2]).
-export([adding_handler/1]).
-export([removing_handler/1]).
-export([changing_config/3]).
-export([filter_config/1]).

-spec log(logger:log_event(), map()) -> ok.
log(LogEvent, Config) ->
otel_log_handler:log(LogEvent, Config).

-spec adding_handler(map()) -> {ok, map()} | {error, term()}.
adding_handler(Config) ->
otel_log_handler:adding_handler(merge_module_config(Config)).

-spec removing_handler(map()) -> ok.
removing_handler(Config) ->
otel_log_handler:removing_handler(Config).

-spec changing_config(set | update, map(), map()) -> {ok, map()} | {error, term()}.
changing_config(SetOrUpdate, OldConfig, NewConfig) ->
otel_log_handler:changing_config(
SetOrUpdate,
merge_module_config(OldConfig),
merge_module_config(NewConfig)
).

-spec filter_config(map()) -> map().
filter_config(Config) ->
otel_log_handler:filter_config(Config).

%% Переносим ключи из вложенного config в корневой map для otel_log_handler.
%% Только ключи, которые ожидает otel_log_handler — чтобы случайно не перезаписать
%% верхнеуровневые настройки logger (level, filters, filter_default и т.д.).
-define(OTEL_LOG_HANDLER_KEYS, [
exporter,
report_cb,
max_queue_size,
scheduled_delay_ms,
exporting_timeout_ms
]).

merge_module_config(#{config := ModuleConfig} = Config) when is_map(ModuleConfig) ->
OtelConfig = maps:with(?OTEL_LOG_HANDLER_KEYS, ModuleConfig),
maps:merge(Config, OtelConfig);
merge_module_config(Config) ->
Config.
3 changes: 2 additions & 1 deletion apps/limiter/src/limiter.app.src
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,8 @@
dmt_client,
opentelemetry_api,
opentelemetry_exporter,
opentelemetry
opentelemetry,
opentelemetry_experimental
]},
{mod, {limiter, []}},
{env, []}
Expand Down
70 changes: 68 additions & 2 deletions apps/limiter/src/limiter.erl
Original file line number Diff line number Diff line change
Expand Up @@ -17,11 +17,18 @@

-spec start(normal, any()) -> {ok, pid()} | {error, any()}.
start(_StartType, _StartArgs) ->
ok = setup_metrics(),
supervisor:start_link({local, ?MODULE}, ?MODULE, []).
case ensure_otel_log_handler() of
ok ->
ok = setup_metrics(),
supervisor:start_link({local, ?MODULE}, ?MODULE, []);
{error, Reason} ->
logger:error("Failed to add otel_logs handler: ~p", [Reason]),
{error, Reason}
end.

-spec stop(any()) -> ok.
stop(_State) ->
ok = flush_otel_logs(),
ok.

%%
Expand Down Expand Up @@ -99,3 +106,62 @@ get_prometheus_route() ->
setup_metrics() ->
ok = woody_ranch_prometheus_collector:setup(),
ok = woody_hackney_prometheus_collector:setup().

ensure_otel_log_handler() ->
case logger:get_handler_config(otel_logs) of
{ok, _} ->
ok;
_ ->
MaxQueue = application:get_env(limiter, otel_log_max_queue_size, 2048),
DelayMs = application:get_env(limiter, otel_log_scheduled_delay_ms, 1000),
TimeoutMs = application:get_env(limiter, otel_log_exporting_timeout_ms, 300000),
LogLevel = application:get_env(limiter, otel_log_level, info),
HandlerConfig = #{
report_cb => fun lim_otel_log_filter:format_otp_report_utf8/1,
exporter =>
{otel_exporter_logs_otlp, #{
protocol => http_protobuf,
ssl_options => []
}},
max_queue_size => MaxQueue,
scheduled_delay_ms => DelayMs,
exporting_timeout_ms => TimeoutMs
},
LoggerHandlerConfig = #{
level => LogLevel,
filter_default => log,
filters => [{lim_otel_trace_id_bytes, {fun lim_otel_log_filter:filter/2, undefined}}],
config => HandlerConfig
},
case logger:add_handler(otel_logs, lim_otel_log_handler, LoggerHandlerConfig) of
ok ->
ok;
{error, {already_exist, _}} ->
ok;
{error, Reason} ->
{error, {otel_log_handler_failed, Reason}}
end
end.

%% @doc Ждём отправки буферизованных логов перед остановкой.
%% otel_log_handler батчит логи и отправляет по таймеру (scheduled_delay_ms).
%% Явного API для flush у otel_log_handler нет, поэтому ждём один полный цикл
%% батчинга + запас на сетевую отправку (export overhead).
-define(FLUSH_EXPORT_OVERHEAD_MS, 700).
-define(FLUSH_MAX_WAIT_MS, 5000).

flush_otel_logs() ->
case logger:get_handler_config(otel_logs) of
{ok, HandlerCfg} ->
Config = maps:get(config, HandlerCfg, #{}),
DelayMs = maps:get(
scheduled_delay_ms,
Config,
maps:get(scheduled_delay_ms, HandlerCfg, 1000)
),
_ = logger:info("otel_log_handler_flush"),
timer:sleep(erlang:min(?FLUSH_MAX_WAIT_MS, DelayMs + ?FLUSH_EXPORT_OVERHEAD_MS)),
ok;
_ ->
ok
end.
88 changes: 73 additions & 15 deletions compose.tracing.yaml
Original file line number Diff line number Diff line change
@@ -1,33 +1,91 @@
# UI: Grafana http://localhost:3000 (admin/admin)
services:
# OpenTelemetry Collector: single OTLP endpoint, fans out to Tempo + Loki
otel-collector:
image: otel/opentelemetry-collector-contrib:0.112.0
command: ["--config=/etc/otel/config.yaml"]
volumes:
- ./test/tracing/otel-collector-config.yaml:/etc/otel/config.yaml:ro
ports:
- "4317:4317" # OTLP gRPC
- "4318:4318" # OTLP HTTP
healthcheck:
test: ["CMD", "/otelcol-contrib", "--version"]
interval: 5s
timeout: 2s
retries: 20
start_period: 5s
depends_on:
tempo:
condition: service_healthy
loki:
condition: service_healthy

dmt:
environment: &otlp_enabled
OTEL_TRACES_EXPORTER: otlp
OTEL_LOGS_EXPORTER: otlp
OTEL_TRACES_SAMPLER: parentbased_always_off
OTEL_EXPORTER_OTLP_PROTOCOL: http_protobuf
OTEL_EXPORTER_OTLP_ENDPOINT: http://jaeger:4318
OTEL_EXPORTER_OTLP_ENDPOINT: http://otel-collector:4318
OTEL_EXPORTER_OTLP_LOGS_ENDPOINT: http://otel-collector:4318/v1/logs
OTEL_SERVICE_NAME: dmt

testrunner:
environment:
<<: *otlp_enabled
OTEL_SERVICE_NAME: limiter_testrunner
OTEL_SERVICE_NAME: limiter
OTEL_TRACES_SAMPLER: parentbased_always_on
depends_on:
jaeger:
dmt:
condition: service_healthy
liminator:
condition: service_healthy
otel-collector:
condition: service_healthy
grafana:
condition: service_started

jaeger:
image: jaegertracing/all-in-one:1.47
environment:
- COLLECTOR_OTLP_ENABLED=true
tempo:
image: grafana/tempo:2.6.1
command: ["-config.file=/etc/tempo.yaml"]
volumes:
- ./test/tracing/tempo.yaml:/etc/tempo.yaml:ro
ports:
- 3200:3200
healthcheck:
test: "/go/bin/all-in-one-linux status"
interval: 2s
timeout: 1s
test: ["CMD-SHELL", "wget -q -O- http://localhost:3200/ready || exit 1"]
interval: 5s
timeout: 2s
retries: 20
start_period: 5s

loki:
image: grafana/loki:3.1.1
command: ["-config.file=/etc/loki/config.yaml"]
volumes:
- ./test/tracing/loki.yaml:/etc/loki/config.yaml:ro
ports:
- 4317:4317 # OTLP gRPC receiver
- 4318:4318 # OTLP http receiver
- 5778:5778
- 14250:14250
- 16686:16686
- 3100:3100
healthcheck:
test: ["CMD-SHELL", "wget -q -O- http://localhost:3100/ready || exit 1"]
interval: 5s
timeout: 2s
retries: 20
start_period: 5s

grafana:
image: grafana/grafana:12.3.3
environment:
- GF_SECURITY_ADMIN_USER=admin
- GF_SECURITY_ADMIN_PASSWORD=admin
- GF_USERS_ALLOW_SIGN_UP=false
volumes:
- ./test/tracing/grafana/provisioning:/etc/grafana/provisioning:ro
ports:
- 3000:3000
depends_on:
loki:
condition: service_healthy
tempo:
condition: service_healthy
9 changes: 9 additions & 0 deletions config/sys.config
Original file line number Diff line number Diff line change
@@ -1,5 +1,10 @@
[
{limiter, [
%% OTEL log handler configuration
{otel_log_level, info},
{otel_log_max_queue_size, 2048},
{otel_log_scheduled_delay_ms, 1000},
{otel_log_exporting_timeout_ms, 300000},
{ip, "::"},
{port, 8022},
{services, #{
Expand Down Expand Up @@ -106,6 +111,10 @@
{storage, scoper_storage_logger}
]},

{opentelemetry, [
{processors, [{otel_batch_processor, #{scheduled_delay_ms => 1000}}]}
]},

{prometheus, [
{collectors, [default]}
]}
Expand Down
27 changes: 24 additions & 3 deletions rebar.config
Original file line number Diff line number Diff line change
Expand Up @@ -37,9 +37,29 @@
{scoper, {git, "https://github.com/valitydev/scoper.git", {tag, "v1.1.0"}}},
{woody, {git, "https://github.com/valitydev/woody_erlang.git", {tag, "v1.1.0"}}},
{dmt_client, {git, "https://github.com/valitydev/dmt_client.git", {tag, "v2.0.3"}}},
{opentelemetry_api, "1.4.0"},
{opentelemetry, "1.5.0"},
{opentelemetry_exporter, "1.8.0"}

%% OpenTelemetry deps (otel_log_handler, otel_exporter_logs_otlp from opentelemetry_experimental)
{opentelemetry_api, "1.5.0"},
{opentelemetry, "1.7.0"},
{opentelemetry_exporter,
{git_subdir, "https://github.com/valitydev/opentelemetry-erlang.git",
{branch, "fix/otlp-common-charlist-string"}, "apps/opentelemetry_exporter"}},
{opentelemetry_api_experimental, "0.5.1"},
{opentelemetry_experimental,
{git_subdir, "https://github.com/valitydev/opentelemetry-erlang.git",
{branch, "fix/otlp-common-charlist-string"}, "apps/opentelemetry_experimental"}}
]}.

%% opentelemetry_experimental из git требует ~> 0.5.2 и старые otel (1.4/1.5),
%% но opentelemetry_exporter требует 1.5.0/1.7.0. Переопределяем на новые версии.
{overrides, [
{override, opentelemetry_experimental, [
{deps, [
{opentelemetry_api, "1.5.0"},
{opentelemetry, "1.7.0"},
{opentelemetry_api_experimental, "0.5.1"}
]}
]}
]}.

%% XRef checks
Expand Down Expand Up @@ -76,6 +96,7 @@
{runtime_tools, load},
{tools, load},
{opentelemetry, temporary},
{opentelemetry_experimental, load},
{logger_logstash_formatter, load},
prometheus,
prometheus_cowboy,
Expand Down
Loading
Loading