Skip to content

Commit c7729df

Browse files
author
José Valim
committed
Add Regex.recompile/1 and Regex.recompile\!/1
1 parent 61bb6c1 commit c7729df

File tree

3 files changed

+145
-26
lines changed

3 files changed

+145
-26
lines changed

lib/elixir/lib/regex.ex

Lines changed: 91 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1,24 +1,52 @@
11
defmodule Regex do
22
@moduledoc ~S"""
3-
Provides regular expressions for Elixir. Built on top of Erlang's `:re`
4-
module.
3+
Provides regular expressions for Elixir.
54
6-
As the `:re` module, Regex is based on PCRE
7-
(Perl Compatible Regular Expressions). More information can be
8-
found in the [`:re` module documentation](http://www.erlang.org/doc/man/re.html).
5+
Regex is based on PCRE (Perl Compatible Regular Expressions) and
6+
built on top of Erlang's `:re` module. More information can be found
7+
in the [`:re` module documentation](http://www.erlang.org/doc/man/re.html).
98
10-
Regular expressions in Elixir can be created using `Regex.compile!/2`
11-
or using the special form with [`~r`](Kernel.html#sigil_r/2) or [`~R`](Kernel.html#sigil_R/2):
9+
Regular expressions in Elixir can be created using the sigils
10+
[`~r`](Kernel.html#sigil_r/2) or [`~R`](Kernel.html#sigil_R/2):
1211
1312
# A simple regular expressions that matches foo anywhere in the string
1413
~r/foo/
1514
1615
# A regular expression with case insensitive and Unicode options
1716
~r/foo/iu
1817
18+
Regular expressions created via sigils are pre-compiled and stored
19+
in the `.beam` file. Notice this may be a problem if you are precompiling
20+
Elixir, see the "Precompilation" section for more information.
21+
1922
A Regex is represented internally as the `Regex` struct. Therefore,
2023
`%Regex{}` can be used whenever there is a need to match on them.
2124
25+
Keep in mind it is not guaranteed two regular expressions from the
26+
same source are equal, for example:
27+
28+
~r/(?<foo>.)(?<bar>.)/ == ~r/(?<foo>.)(?<bar>.)/
29+
30+
may return `true` or `false` depending on your machine, endianess,
31+
available optimizations and others. You can, however, retrieve the source
32+
of a compiled regular expression by accessing the `source` field, and then
33+
compare those directly:
34+
35+
~r/(?<foo>.)(?<bar>.)/.source == ~r/(?<foo>.)(?<bar>.)/.source
36+
37+
## Precompilation
38+
39+
Regular expressions built with sigil are precompiled and stored in `.beam`
40+
files. This may be a problem if you are precompiling Elixir to run in
41+
different OTP releases, as OTP releases may update the underlying regular
42+
expression engine at any time.
43+
44+
For such reasons, we always recomend precompiling Elixir projects using
45+
the OTP version meant to run in production. In case cross-compilation is
46+
really necessary, you can manually invoke `Regex.recompile/1` or `Regex.
47+
recompile!/1` to perform a runtime version check and recompile the regex
48+
if necessary.
49+
2250
## Modifiers
2351
2452
The modifiers available when creating a Regex are:
@@ -78,7 +106,7 @@ defmodule Regex do
78106
79107
"""
80108

81-
defstruct re_pattern: nil, source: "", opts: ""
109+
defstruct re_pattern: nil, source: "", opts: "", re_version: ""
82110

83111
@type t :: %__MODULE__{re_pattern: term, source: binary, opts: binary}
84112

@@ -91,7 +119,7 @@ defmodule Regex do
91119
92120
The given options can either be a binary with the characters
93121
representing the same regex options given to the `~r` sigil,
94-
or a list of options, as expected by the Erlang's [`:re` module](http://www.erlang.org/doc/man/re.html).
122+
or a list of options, as expected by the Erlang's `:re` module.
95123
96124
It returns `{:ok, regex}` in case of success,
97125
`{:error, reason}` otherwise.
@@ -106,40 +134,84 @@ defmodule Regex do
106134
107135
"""
108136
@spec compile(binary, binary | [term]) :: {:ok, t} | {:error, any}
109-
def compile(source, options \\ "")
137+
def compile(source, options \\ "") do
138+
compile(source, options, version())
139+
end
110140

111-
def compile(source, options) when is_binary(options) do
141+
defp compile(source, options, version) when is_binary(options) do
112142
case translate_options(options, []) do
113143
{:error, rest} ->
114144
{:error, {:invalid_option, rest}}
115145

116146
translated_options ->
117-
compile(source, translated_options, options)
147+
compile(source, translated_options, options, version)
118148
end
119149
end
120150

121-
def compile(source, options) when is_list(options) do
122-
compile(source, options, "")
151+
defp compile(source, options, version) when is_list(options) do
152+
compile(source, options, "", version)
123153
end
124154

125-
defp compile(source, opts, doc_opts) when is_binary(source) do
155+
defp compile(source, opts, doc_opts, version) when is_binary(source) do
126156
case :re.compile(source, opts) do
127157
{:ok, re_pattern} ->
128-
{:ok, %Regex{re_pattern: re_pattern, source: source, opts: doc_opts}}
158+
{:ok, %Regex{re_pattern: re_pattern, re_version: version, source: source, opts: doc_opts}}
129159
error ->
130160
error
131161
end
132162
end
133163

134164
@doc """
135-
Compiles the regular expression according to the given options.
136-
Fails with `Regex.CompileError` if the regex cannot be compiled.
165+
Compiles the regular expression and raises `Regex.CompileError` in case of errors.
137166
"""
138167
@spec compile!(binary, binary | [term]) :: t
139168
def compile!(source, options \\ "") do
140169
case compile(source, options) do
141170
{:ok, regex} -> regex
142-
{:error, {reason, at}} -> raise Regex.CompileError, message: "#{reason} at position #{at}"
171+
{:error, {reason, at}} -> raise Regex.CompileError, "#{reason} at position #{at}"
172+
end
173+
end
174+
175+
@doc """
176+
Recompiles the existing regular expression if necessary.
177+
178+
This checks the version stored in the regular expression
179+
and recompiles the regex in case of version mismatch.
180+
"""
181+
@spec recompile(t) :: t
182+
def recompile(%Regex{} = regex) do
183+
version = version()
184+
185+
# We use Map.get/3 by choice to support old regexes versions.
186+
case Map.get(regex, :re_version, :error) do
187+
^version ->
188+
{:ok, regex}
189+
_ ->
190+
%{source: source, opts: opts} = regex
191+
compile(source, opts, version)
192+
end
193+
end
194+
195+
@doc """
196+
Recompiles the existing regular expression and raises `Regex.CompileError` in case of errors.
197+
"""
198+
@spec recompile!(t) :: t
199+
def recompile!(regex) do
200+
case recompile(regex) do
201+
{:ok, regex} -> regex
202+
{:error, {reason, at}} -> raise Regex.CompileError, "#{reason} at position #{at}"
203+
end
204+
end
205+
206+
@doc """
207+
Returns the version of the underlying Regex engine.
208+
"""
209+
# TODO: No longer check for function_exported? on OTP 20+.
210+
def version do
211+
if function_exported?(:re, :version, 0) do
212+
:re.version()
213+
else
214+
"8.33 2013-05-29"
143215
end
144216
end
145217

lib/elixir/src/elixir.erl

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,11 @@ start(_Type, _Args) ->
4747
erlang:halt(1)
4848
end,
4949

50+
%% We need to make sure the re module is preloaded
51+
%% to make function_exported checks on it fast.
52+
%% TODO: Remove this once we support OTP 20+.
53+
_ = code:ensure_loaded(re),
54+
5055
case code:ensure_loaded(?system) of
5156
{module, ?system} ->
5257
Endianness = ?system:endianness(),

lib/elixir/test/elixir/regex_test.exs

Lines changed: 49 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -91,7 +91,55 @@ defmodule RegexTest do
9191
refute <<?<, 255, ?>>> =~ ~r/<.>/u
9292
end
9393

94-
test "names" do
94+
test "ungreedy" do
95+
assert Regex.run(~r/[\d ]+/, "1 2 3 4 5"), ["1 2 3 4 5"]
96+
assert Regex.run(~r/[\d ]?+/, "1 2 3 4 5"), ["1"]
97+
assert Regex.run(~r/[\d ]+/U, "1 2 3 4 5"), ["1"]
98+
end
99+
100+
test "regex?/1" do
101+
assert Regex.regex?(~r/foo/)
102+
refute Regex.regex?(0)
103+
end
104+
105+
test "compile/1" do
106+
{:ok, regex} = Regex.compile("foo")
107+
assert Regex.regex?(regex)
108+
assert {:error, _} = Regex.compile("*foo")
109+
assert {:error, _} = Regex.compile("foo", "y")
110+
assert {:error, _} = Regex.compile("foo", "uy")
111+
end
112+
113+
test "compile/1 with Erlang options" do
114+
{:ok, regex} = Regex.compile("foo\\sbar", [:dotall, {:newline, :anycrlf}])
115+
assert "foo\nbar" =~ regex
116+
end
117+
118+
test "compile!/1" do
119+
assert Regex.regex?(Regex.compile!("foo"))
120+
121+
assert_raise Regex.CompileError, ~r/position 0$/, fn ->
122+
Regex.compile!("*foo")
123+
end
124+
end
125+
126+
test "recompile/1" do
127+
new_regex = ~r/foo/
128+
{:ok, regex} = Regex.recompile(new_regex)
129+
assert Regex.regex?(regex)
130+
assert Regex.regex?(Regex.recompile!(new_regex))
131+
132+
old_regex = Map.delete(~r/foo/, :re_version)
133+
{:ok, regex} = Regex.recompile(old_regex)
134+
assert Regex.regex?(regex)
135+
assert Regex.regex?(Regex.recompile!(old_regex))
136+
end
137+
138+
test "opts/1" do
139+
assert Regex.opts(Regex.compile!("foo", "i")) == "i"
140+
end
141+
142+
test "names/1" do
95143
assert Regex.names(~r/(?<FOO>foo)/) == ["FOO"]
96144
end
97145

@@ -219,12 +267,6 @@ defmodule RegexTest do
219267
assert Regex.replace(~r[a(b)c], "abcabc", fn "abc", "b" -> "ac" end, global: false) == "acabc"
220268
end
221269

222-
test "ungreedy" do
223-
assert Regex.run(~r/[\d ]+/, "1 2 3 4 5"), ["1 2 3 4 5"]
224-
assert Regex.run(~r/[\d ]?+/, "1 2 3 4 5"), ["1"]
225-
assert Regex.run(~r/[\d ]+/U, "1 2 3 4 5"), ["1"]
226-
end
227-
228270
test "escape" do
229271
assert matches_escaped?(".")
230272
refute matches_escaped?(".", "x")

0 commit comments

Comments
 (0)