Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
48 changes: 42 additions & 6 deletions keepsorted/line_group.go
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,7 @@ func groupLines(lines []string, metadata blockMetadata) []*lineGroup {
// Returns another boolean indicating whether the group should be ending
// after that line if so.
shouldAddToRegexDelimitedGroup := func(l string) (addToGroup bool, finishGroupAfter bool) {
if metadata.opts.GroupStartRegex != nil {
if metadata.opts.GroupStartRegex != nil {
// For GroupStartRegex, all non-regex-matching lines should be
// part of the group including prior lines.
return !matchesAnyRegex(l, metadata.opts.GroupStartRegex), false
Expand Down Expand Up @@ -349,7 +349,7 @@ func (cb *codeBlock) append(s string, opts blockOptions) {
cb.braceCounts = make(map[string]int)
}

// TODO(jfalgout): Does this need to handle runes more correctly?
// TODO: jfaer - Does this need to handle runes more correctly?
for i := 0; i < len(s); {
if cb.expectedQuote == "" {
// We do not appear to be inside a string literal.
Expand Down Expand Up @@ -419,8 +419,18 @@ func (lg *lineGroup) commentOnly() bool {
}

func (lg *lineGroup) regexTokens() []regexToken {
// TODO: jfaer - Should we match regexes on the original content?
regexMatches := lg.opts.matchRegexes(lg.internalJoinedLines())
var regexMatches []regexMatch
if len(lg.opts.ByRegex) == 0 {
// We apply other options on top of what the regex extracts, but if the user
// didn't set by_regex, we should fall back to the behavior they would've
// expected before we started supporting by_regex.
// Namely: We would apply the other options on top of the
// internalJoinedLines() instead of the raw content.
regexMatches = []regexMatch{{lg.internalJoinedLines()}}
} else {
regexMatches = lg.opts.matchRegexes(lg.regexJoinedLines())
}

ret := make([]regexToken, len(regexMatches))
if lg.access.regexTokens == nil {
lg.access.regexTokens = make([]regexTokenAccessRecorder, len(regexMatches))
Expand Down Expand Up @@ -453,8 +463,15 @@ func (lg *lineGroup) regexTokens() []regexToken {
return ret
}

// internalJoinedLines calculates the same thing as joinedLines, except it
// doesn't record that it was used in the accessRecorder.
// internalJoinedLines attempts to concatenate all of this lineGroup's content
// the same way a reasonable human might.
//
// If the previous line ends with a "word" character and the current line starts
// with a "word" character, the two lines will be separated by a space.
// Otherwise, the lines are concatenated without any separation.
//
// Unlike joinedLines, this method does not record that it was used in the
// accessRecorder.
func (lg *lineGroup) internalJoinedLines() string {
if len(lg.lines) == 0 {
return ""
Expand All @@ -475,6 +492,25 @@ func (lg *lineGroup) internalJoinedLines() string {
return s.String()
}

// regexJoinedLines concatenates all of this lineGroup's content in a way that's
// friendlier to regexes than internalJoinedLines.
//
// Primarily, this method still strips leading whitespace but it uses a real
// newline character to separate lines instead of the "word" character logic of
// internalJoinedLines.
func (lg *lineGroup) regexJoinedLines() string {
if len(lg.lines) == 0 {
return ""
}
lines := make([]string, len(lg.lines))
for i, l := range lg.lines {
lines[i] = strings.TrimLeftFunc(l, unicode.IsSpace)
}
return strings.Join(lines, "\n")
}

// joinedLines returns internalJoinedLines and records that it was called in the
// accessRecorder.
func (lg *lineGroup) joinedLines() string {
lg.access.joinedLines = true
return lg.internalJoinedLines()
Expand Down
17 changes: 14 additions & 3 deletions keepsorted/options_parser.go
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,7 @@ func (p *parser) popIntOrBool() (IntOrBool, error) {
func (ar *ByRegexOption) UnmarshalYAML(node *yaml.Node) error {
switch node.Tag {
case "!!str":
pat, err := regexp.Compile(node.Value)
pat, err := compileByRegex(node.Value)
if err != nil {
return err
}
Expand All @@ -141,7 +141,7 @@ func (ar *ByRegexOption) UnmarshalYAML(node *yaml.Node) error {
return fmt.Errorf("by_regex map item must have exactly one key-value pair, but got %d", len(m))
}
for pattern, template := range m {
pat, err := regexp.Compile(pattern)
pat, err := compileByRegex(pattern)
if err != nil {
return fmt.Errorf("invalid regex pattern %q: %w", pattern, err)
}
Expand Down Expand Up @@ -191,7 +191,7 @@ func (p *parser) popList() ([]string, error) {

func (p *parser) popByRegexOption() ([]ByRegexOption, error) {
return popListValue(p, func(s string) (ByRegexOption, error) {
pat, err := regexp.Compile(s)
pat, err := compileByRegex(s)
return ByRegexOption{Pattern: pat}, err
})
}
Expand Down Expand Up @@ -321,3 +321,14 @@ func (iter *runeIter) pop() (rune, bool) {
}
return ch, ok
}

func compileByRegex(re string) (*regexp.Regexp, error) {
if !strings.HasPrefix(re, "(?s)") && !strings.HasPrefix(re, "(?-s)") {
// The initial version of by_regex ran on top of lineGroup.joinedLines. This
// meant that users wrote regexes that didn't need to handle newlines.
// To minimize disruption, we automatically set dotall flag so that their
// regexes might still work after we start using real newlines.
re = "(?s)" + re
}
return regexp.Compile(re)
}
16 changes: 8 additions & 8 deletions keepsorted/options_parser_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -218,17 +218,17 @@ func TestPopValue(t *testing.T) {
name: "Regex",

input: ".*",
want: []ByRegexOption{{regexp.MustCompile(".*"), nil}},
want: []ByRegexOption{{regexp.MustCompile("(?s).*"), nil}},
},
{
name: "MultipleRegex",

input: `[.*, abcd, '(?:efgh)ijkl']`,
allowYAMLList: true,
want: []ByRegexOption{
{regexp.MustCompile(".*"), nil},
{regexp.MustCompile("abcd"), nil},
{regexp.MustCompile("(?:efgh)ijkl"), nil},
{regexp.MustCompile("(?s).*"), nil},
{regexp.MustCompile("(?s)abcd"), nil},
{regexp.MustCompile("(?s)(?:efgh)ijkl"), nil},
},
},
{
Expand All @@ -237,10 +237,10 @@ func TestPopValue(t *testing.T) {
input: `[.*, Mon: 0, '\b(\d{2})/(\d{2})/(\d{4})\b': '${3}-${1}-${2}', "0: 1": 2]`,
allowYAMLList: true,
want: []ByRegexOption{
{regexp.MustCompile(".*"), nil},
{regexp.MustCompile("Mon"), &([]string{"0"})[0]},
{regexp.MustCompile(`\b(\d{2})/(\d{2})/(\d{4})\b`), &([]string{"${3}-${1}-${2}"})[0]},
{regexp.MustCompile(`0: 1`), &([]string{"2"})[0]},
{regexp.MustCompile("(?s).*"), nil},
{regexp.MustCompile("(?s)Mon"), &([]string{"0"})[0]},
{regexp.MustCompile(`(?s)\b(\d{2})/(\d{2})/(\d{4})\b`), &([]string{"${3}-${1}-${2}"})[0]},
{regexp.MustCompile(`(?s)0: 1`), &([]string{"2"})[0]},
},
},
{
Expand Down
11 changes: 6 additions & 5 deletions keepsorted/options_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -194,7 +194,8 @@ func TestBlockOptions(t *testing.T) {
want: blockOptions{
AllowYAMLLists: true,
ByRegex: []ByRegexOption{
{regexp.MustCompile("(?:abcd)"), nil}, {regexp.MustCompile("efg.*"), nil},
{regexp.MustCompile("(?s)(?:abcd)"), nil},
{regexp.MustCompile("(?s)efg.*"), nil},
},
},
},
Expand All @@ -206,8 +207,8 @@ func TestBlockOptions(t *testing.T) {
want: blockOptions{
AllowYAMLLists: true,
ByRegex: []ByRegexOption{
{Pattern: regexp.MustCompile(`.*`)},
{Pattern: regexp.MustCompile(`\b(\d{2})/(\d{2})/(\d{4})\b`),
{Pattern: regexp.MustCompile(`(?s).*`)},
{Pattern: regexp.MustCompile(`(?s)\b(\d{2})/(\d{2})/(\d{4})\b`),
Template: &[]string{"${3}-${1}-${2}"}[0]},
},
},
Expand All @@ -220,8 +221,8 @@ func TestBlockOptions(t *testing.T) {
want: blockOptions{
AllowYAMLLists: true,
ByRegex: []ByRegexOption{
{Pattern: regexp.MustCompile(`foo, bar`)},
{Pattern: regexp.MustCompile(`\b(\d{2})/(\d{2})/(\d{4})\b`),
{Pattern: regexp.MustCompile(`(?s)foo, bar`)},
{Pattern: regexp.MustCompile(`(?s)\b(\d{2})/(\d{2})/(\d{4})\b`),
Template: &[]string{"${3}-${1}-${2}"}[0]},
},
},
Expand Down