Skip to content

Commit c4e3f80

Browse files
author
Daniel Rubery
committed
Adding user-defined group delimiters
This addresses #97. The discussion there suggested trying to override line breaks, but this turns out to be difficult. The options are not specified until after we've split into lines, and lots of places use line numbers for fixes. Instead, we allow customization of how lines are associated into groups. In order to support the TOML's use case (end groups on blank line) and SQL's use case (end groups if the line ends in a semicolon), we allow a new list of regexes, group_delimiter_regexes that determine if the line should end a group. There's a subtlety to precedence with sticky_prefixes. Since lines starting with sticky_prefixes should associate to a group below them, the first line with a sticky prefix can create a new group, despite what the We allow sticky_prefixes to take precedence.
1 parent 975d9a8 commit c4e3f80

8 files changed

Lines changed: 137 additions & 23 deletions

goldens/group_delimiter_regexes.in

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
Sort by consecutive newlines:
2+
// keep-sorted-test start group_delimiter_regexes=['^$']
3+
b-block
4+
attached line 1
5+
attached line 2
6+
7+
a-block
8+
attached line 3
9+
attached line 4
10+
// keep-sorted-test end
11+
12+
Sort by semicolon:
13+
// keep-sorted-test start group_delimiter_regexes=[';$']
14+
XXX a semicolon ; in the middle is ignored
15+
But at the end it terminates the group ;
16+
17+
ZZZ This group will come after YYY;
18+
19+
YYY will be sorted up;
20+
// keep-sorted-test end
Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
Sort by consecutive newlines:
2+
// keep-sorted-test start group_delimiter_regexes=['^$']
3+
a-block
4+
attached line 3
5+
attached line 4
6+
b-block
7+
attached line 1
8+
attached line 2
9+
10+
// keep-sorted-test end
11+
12+
Sort by semicolon:
13+
// keep-sorted-test start group_delimiter_regexes=[';$']
14+
XXX a semicolon ; in the middle is ignored
15+
But at the end it terminates the group ;
16+
17+
YYY will be sorted up;
18+
19+
ZZZ This group will come after YYY;
20+
// keep-sorted-test end

keepsorted/keep_sorted_test.go

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
package keepsorted
1616

1717
import (
18+
"regexp"
1819
"strings"
1920
"testing"
2021

@@ -1632,6 +1633,52 @@ func TestLineGrouping(t *testing.T) {
16321633
`"""`}},
16331634
},
16341635
},
1636+
{
1637+
name: "GroupDelimiter_BlankLine",
1638+
opts: blockOptions{
1639+
GroupDelimiterRegexes: []RegexOption{
1640+
{Pattern: regexp.MustCompile(`^$`)},
1641+
},
1642+
},
1643+
1644+
want: []lineGroupContent{
1645+
{lines: []string{
1646+
"[toml]",
1647+
"key=value",
1648+
"",
1649+
}},
1650+
{lines: []string{
1651+
"[block_two]",
1652+
"key=value",
1653+
"",
1654+
}},
1655+
{lines: []string{
1656+
"[block_three]",
1657+
"final_key=value",
1658+
}},
1659+
},
1660+
},
1661+
{
1662+
name: "GroupDelimiter_Semicolon",
1663+
opts: blockOptions{
1664+
GroupDelimiterRegexes: []RegexOption{
1665+
{Pattern: regexp.MustCompile(`;$`)},
1666+
},
1667+
},
1668+
1669+
want: []lineGroupContent{
1670+
{lines: []string{
1671+
"statement ; in middle",
1672+
"semicolon at end;",
1673+
}},
1674+
{lines: []string{
1675+
"Next paragraph;",
1676+
}},
1677+
{lines: []string{
1678+
"And the final one",
1679+
}},
1680+
},
1681+
},
16351682
} {
16361683
t.Run(tc.name, func(t *testing.T) {
16371684
initZerolog(t)

keepsorted/line_group.go

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -149,6 +149,17 @@ func groupLines(lines []string, metadata blockMetadata) []*lineGroup {
149149
} else {
150150
commentRange.append(i)
151151
}
152+
} else if len(metadata.opts.GroupDelimiterRegexes) != 0 {
153+
appendLine(i, l)
154+
for _, match := range metadata.opts.matchRegexes(l, metadata.opts.GroupDelimiterRegexes) {
155+
if match == nil {
156+
continue
157+
}
158+
if !lineRange.empty() {
159+
finishGroup()
160+
}
161+
break
162+
}
152163
} else {
153164
if !lineRange.empty() {
154165
finishGroup()
@@ -361,7 +372,7 @@ func (lg *lineGroup) commentOnly() bool {
361372

362373
func (lg *lineGroup) regexTokens() []regexToken {
363374
// TODO: jfaer - Should we match regexes on the original content?
364-
regexMatches := lg.opts.matchRegexes(lg.internalJoinedLines())
375+
regexMatches := lg.opts.matchRegexes(lg.internalJoinedLines(), lg.opts.ByRegex)
365376
ret := make([]regexToken, len(regexMatches))
366377
if lg.access.regexTokens == nil {
367378
lg.access.regexTokens = make([]regexTokenAccessRecorder, len(regexMatches))

keepsorted/options.go

Lines changed: 13 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ import (
3535
// true is unmarshaled as 1, false as 0.
3636
type IntOrBool int
3737

38-
type ByRegexOption struct {
38+
type RegexOption struct {
3939
Pattern *regexp.Regexp
4040
Template *string
4141
}
@@ -67,7 +67,7 @@ func (opts BlockOptions) String() string {
6767
// - []string: key=a,b,c,d
6868
// - map[string]bool: key=a,b,c,d
6969
// - int: key=123
70-
// - ByRegexOptions key=a,b,c,d, key=[yaml_list]
70+
// - []RegexOptions: key=a,b,c,d, key=[yaml_list]
7171
type blockOptions struct {
7272
// AllowYAMLLists determines whether list.set valued options are allowed to be specified by YAML.
7373
AllowYAMLLists bool `key:"allow_yaml_lists"`
@@ -88,6 +88,8 @@ type blockOptions struct {
8888
StickyComments bool `key:"sticky_comments"`
8989
// StickyPrefixes tells us about other types of lines that should behave as sticky comments.
9090
StickyPrefixes map[string]bool `key:"sticky_prefixes"`
91+
// GroupDelimiterRegexes tells us if a line is allowed to end a group.
92+
GroupDelimiterRegexes []RegexOption `key:"group_delimiter_regexes"`
9193

9294
///////////////////////
9395
// Sorting options //
@@ -102,7 +104,7 @@ type blockOptions struct {
102104
// IgnorePrefixes is a slice of prefixes that we do not consider when sorting lines.
103105
IgnorePrefixes []string `key:"ignore_prefixes"`
104106
// ByRegex is a slice of regexes that are used to extract the pieces of the line group that keep-sorted should sort by.
105-
ByRegex []ByRegexOption `key:"by_regex"`
107+
ByRegex []RegexOption `key:"by_regex"`
106108

107109
////////////////////////////
108110
// Post-sorting options //
@@ -210,8 +212,8 @@ func formatValue(val reflect.Value) (string, error) {
210212
return strconv.Itoa(int(val.Int())), nil
211213
case reflect.TypeFor[int]():
212214
return strconv.Itoa(int(val.Int())), nil
213-
case reflect.TypeFor[[]ByRegexOption]():
214-
opts := val.Interface().([]ByRegexOption)
215+
case reflect.TypeFor[[]RegexOption]():
216+
opts := val.Interface().([]RegexOption)
215217
vals := make([]string, 0, len(opts))
216218
seenTemplate := false
217219
for _, opt := range opts {
@@ -390,20 +392,20 @@ func (opts blockOptions) trimIgnorePrefix(s string) string {
390392
return s
391393
}
392394

393-
// matchRegexes applies ByRegex to s.
394-
// If ByRegex is empty, returns a slice that contains just s.
395+
// matchRegexes applies regexes to s.
396+
// If regexes is empty, returns a slice that contains just s.
395397
// Otherwise, applies each regex to s in sequence:
396398
// If a regex has capturing groups, the capturing groups will be added to the
397399
// resulting slice.
398400
// If a regex does not have capturing groups, all matched text will be added to
399401
// the resulting slice.
400-
func (opts blockOptions) matchRegexes(s string) []regexMatch {
401-
if len(opts.ByRegex) == 0 {
402+
func (opts blockOptions) matchRegexes(s string, regexes []RegexOption) []regexMatch {
403+
if len(regexes) == 0 {
402404
return []regexMatch{{s}}
403405
}
404406

405407
var ret []regexMatch
406-
for _, p := range opts.ByRegex {
408+
for _, p := range regexes {
407409
regex := p.Pattern
408410

409411
if p.Template != nil {
@@ -421,6 +423,7 @@ func (opts blockOptions) matchRegexes(s string) []regexMatch {
421423
}
422424

423425
m := regex.FindStringSubmatch(s)
426+
424427
if m == nil {
425428
ret = append(ret, regexDidNotMatch)
426429
continue

keepsorted/options_parser.go

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,7 @@ func (p *parser) popValue(typ reflect.Type) (reflect.Value, error) {
6565
case reflect.TypeFor[map[string]bool]():
6666
val, err := p.popSet()
6767
return reflect.ValueOf(val), err
68-
case reflect.TypeFor[[]ByRegexOption]():
68+
case reflect.TypeFor[[]RegexOption]():
6969
val, err := p.popListRegexOption()
7070
if err != nil {
7171
return reflect.Zero(typ), err
@@ -113,7 +113,7 @@ func (p *parser) popIntOrBool() (IntOrBool, error) {
113113
return IntOrBool(i), nil
114114
}
115115

116-
func (ar *ByRegexOption) UnmarshalYAML(node *yaml.Node) error {
116+
func (ar *RegexOption) UnmarshalYAML(node *yaml.Node) error {
117117
switch node.Tag {
118118
case "!!str":
119119
pat, err := regexp.Compile(node.Value)
@@ -180,10 +180,10 @@ func (p *parser) popList() ([]string, error) {
180180
return popListValue(p, func(s string) (string, error) { return s, nil })
181181
}
182182

183-
func (p *parser) popListRegexOption() ([]ByRegexOption, error) {
184-
return popListValue(p, func(s string) (ByRegexOption, error) {
183+
func (p *parser) popListRegexOption() ([]RegexOption, error) {
184+
return popListValue(p, func(s string) (RegexOption, error) {
185185
pat, err := regexp.Compile(s)
186-
return ByRegexOption{Pattern: pat}, err
186+
return RegexOption{Pattern: pat}, err
187187
})
188188
}
189189

keepsorted/options_parser_test.go

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -218,14 +218,14 @@ func TestPopValue(t *testing.T) {
218218
name: "Regex",
219219

220220
input: ".*",
221-
want: []ByRegexOption{{regexp.MustCompile(".*"), nil}},
221+
want: []RegexOption{{regexp.MustCompile(".*"), nil}},
222222
},
223223
{
224224
name: "MultipleRegex",
225225

226226
input: `[.*, abcd, '(?:efgh)ijkl']`,
227227
allowYAMLList: true,
228-
want: []ByRegexOption{
228+
want: []RegexOption{
229229
{regexp.MustCompile(".*"), nil},
230230
{regexp.MustCompile("abcd"), nil},
231231
{regexp.MustCompile("(?:efgh)ijkl"), nil},
@@ -236,7 +236,7 @@ func TestPopValue(t *testing.T) {
236236

237237
input: `[.*, Mon: 0, '\b(\d{2})/(\d{2})/(\d{4})\b': '${3}-${1}-${2}', "0: 1": 2]`,
238238
allowYAMLList: true,
239-
want: []ByRegexOption{
239+
want: []RegexOption{
240240
{regexp.MustCompile(".*"), nil},
241241
{regexp.MustCompile("Mon"), &([]string{"0"})[0]},
242242
{regexp.MustCompile(`\b(\d{2})/(\d{2})/(\d{4})\b`), &([]string{"${3}-${1}-${2}"})[0]},

keepsorted/options_test.go

Lines changed: 17 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -193,7 +193,7 @@ func TestBlockOptions(t *testing.T) {
193193

194194
want: blockOptions{
195195
AllowYAMLLists: true,
196-
ByRegex: []ByRegexOption{
196+
ByRegex: []RegexOption{
197197
{regexp.MustCompile("(?:abcd)"), nil}, {regexp.MustCompile("efg.*"), nil},
198198
},
199199
},
@@ -205,13 +205,26 @@ func TestBlockOptions(t *testing.T) {
205205

206206
want: blockOptions{
207207
AllowYAMLLists: true,
208-
ByRegex: []ByRegexOption{
208+
ByRegex: []RegexOption{
209209
{Pattern: regexp.MustCompile(`.*`)},
210210
{Pattern: regexp.MustCompile(`\b(\d{2})/(\d{2})/(\d{4})\b`),
211211
Template: &[]string{"${3}-${1}-${2}"}[0]},
212212
},
213213
},
214214
},
215+
{
216+
name: "GroupDelimiterRegexes",
217+
in: `group_delimiter_regexes=['^$', ';$']`,
218+
defaultOptions: blockOptions{AllowYAMLLists: true},
219+
220+
want: blockOptions{
221+
AllowYAMLLists: true,
222+
GroupDelimiterRegexes: []RegexOption{
223+
{Pattern: regexp.MustCompile(`^$`)},
224+
{Pattern: regexp.MustCompile(`;$`)},
225+
},
226+
},
227+
},
215228
} {
216229
t.Run(tc.name, func(t *testing.T) {
217230
initZerolog(t)
@@ -325,10 +338,10 @@ func TestBlockOptions_regexTransform(t *testing.T) {
325338
t.Run(tc.name, func(t *testing.T) {
326339
var opts blockOptions
327340
for _, regex := range tc.regexes {
328-
opts.ByRegex = append(opts.ByRegex, ByRegexOption{regexp.MustCompile(regex), nil})
341+
opts.ByRegex = append(opts.ByRegex, RegexOption{regexp.MustCompile(regex), nil})
329342
}
330343

331-
gotTokens := opts.matchRegexes(tc.in)
344+
gotTokens := opts.matchRegexes(tc.in, opts.ByRegex)
332345
got := make([][]string, len(gotTokens))
333346
for i, t := range gotTokens {
334347
got[i] = []string(t)

0 commit comments

Comments
 (0)