@@ -20,9 +20,9 @@ defmodule String.Unicode do
2020 << first :: 4 - bytes , ".." , last :: 4 - bytes , _ :: binary >> ->
2121 first = String . to_integer ( first , 16 )
2222 last = String . to_integer ( last , 16 )
23- Enum . map ( first .. last , & to_binary . ( Integer . to_string ( & 1 , 16 ) ) ) ++ acc
23+ Enum . map ( first .. last , fn int -> << int :: utf8 >> end ) ++ acc
2424 << single :: 4 - bytes , _ :: binary >> ->
25- [ to_binary . ( single ) | acc ]
25+ [ << String . to_integer ( single , 16 ) :: utf8 >> | acc ]
2626 end
2727 end
2828
@@ -222,24 +222,21 @@ defmodule String.Graphemes do
222222 cluster_path = Path . join ( __DIR__ , "GraphemeBreakProperty.txt" )
223223 regex = ~r/ (?:^([0-9A-F]+)(?:\. \. ([0-9A-F]+))?)\s +;\s (\w +)/ m
224224
225- to_range = fn
226- first , "" ->
227- [ << String . to_integer ( first , 16 ) :: utf8 >> ]
228- first , last ->
229- range = String . to_integer ( first , 16 ) .. String . to_integer ( last , 16 )
230- Enum . map ( range , fn ( int ) -> << int :: utf8 >> end )
231- end
232-
233225 cluster = Enum . reduce File . stream! ( cluster_path ) , % { } , fn ( line , dict ) ->
234226 [ _full , first , last , class ] = Regex . run ( regex , line )
235227
236- # Skip surrogates
237- if first == "D800" and last == "DFFF" do
238- dict
239- else
240- list = to_range . ( first , last )
241- Map . update ( dict , class , list , & ( & 1 ++ list ) )
242- end
228+ codepoints =
229+ case { first , last } do
230+ { "D800" , "DFFF" } ->
231+ [ ]
232+ { first , "" } ->
233+ [ << String . to_integer ( first , 16 ) :: utf8 >> ]
234+ { first , last } ->
235+ range = String . to_integer ( first , 16 ) .. String . to_integer ( last , 16 )
236+ Enum . map ( range , fn int -> << int :: utf8 >> end )
237+ end
238+
239+ Map . update ( dict , class , codepoints , & ( & 1 ++ codepoints ) )
243240 end
244241
245242 # There is no codepoint marked as Prepend by Unicode 6.3.0
0 commit comments