Skip to content

Commit bd7b484

Browse files
author
José Valim
committed
Speed up String.split/1
1 parent 75e2ee0 commit bd7b484

File tree

2 files changed

+7
-31
lines changed

2 files changed

+7
-31
lines changed

lib/elixir/lib/string.ex

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -180,7 +180,7 @@ defmodule String do
180180
## Patterns
181181
182182
Many functions in this module work with patterns. For example,
183-
String.split/2 can split a string into multiple patterns given
183+
`String.split/2` can split a string into multiple patterns given
184184
a pattern. This pattern can be a string, a list of strings or
185185
a compiled pattern:
186186

lib/elixir/unicode/unicode.ex

Lines changed: 6 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -361,7 +361,6 @@ defmodule String.Break do
361361
@moduledoc false
362362
@whitespace_max_size 3
363363

364-
# WhiteSpace.txt is extracted from Unicode's PropList.txt (just the White_Space property)
365364
prop_path = Path.join(__DIR__, "WhiteSpace.txt")
366365

367366
whitespace = Enum.reduce File.stream!(prop_path), [], fn line, acc ->
@@ -377,14 +376,10 @@ defmodule String.Break do
377376

378377
# trim_leading
379378

380-
def trim_leading(""), do: ""
381-
382379
for codepoint <- whitespace do
383-
def trim_leading(unquote(codepoint) <> rest) do
384-
trim_leading(rest)
385-
end
380+
def trim_leading(unquote(codepoint) <> rest), do: trim_leading(rest)
386381
end
387-
382+
def trim_leading(""), do: ""
388383
def trim_leading(string) when is_binary(string), do: string
389384

390385
# trim_trailing
@@ -430,31 +425,12 @@ defmodule String.Break do
430425

431426
# Split
432427

433-
def split(""), do: []
434-
435-
def split(string) when is_binary(string) do
436-
:lists.reverse do_split(string, "", [])
428+
def split(string) do
429+
for piece <- :binary.split(string, unquote(whitespace -- non_breakable), [:global]),
430+
piece != "",
431+
do: piece
437432
end
438433

439-
for codepoint <- whitespace -- non_breakable do
440-
defp do_split(unquote(codepoint) <> rest, buffer, acc) do
441-
do_split(rest, "", add_buffer_to_acc(buffer, acc))
442-
end
443-
end
444-
445-
defp do_split(<<char, rest::binary>>, buffer, acc) do
446-
do_split(rest, <<buffer::binary, char>>, acc)
447-
end
448-
449-
defp do_split(<<>>, buffer, acc) do
450-
add_buffer_to_acc(buffer, acc)
451-
end
452-
453-
@compile {:inline, add_buffer_to_acc: 2}
454-
455-
defp add_buffer_to_acc("", acc), do: acc
456-
defp add_buffer_to_acc(buffer, acc), do: [buffer | acc]
457-
458434
# Decompose
459435

460436
def decompose(entries, map) do

0 commit comments

Comments
 (0)