Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
The table of contents is too big for display.
Diff view
Diff view
  •  
  •  
  •  
141 changes: 74 additions & 67 deletions go.mod

Large diffs are not rendered by default.

311 changes: 163 additions & 148 deletions go.sum

Large diffs are not rendered by default.

152 changes: 151 additions & 1 deletion pkg/controller/build/buildrequest/buildrequest.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,14 @@ import (
_ "embed"
"encoding/json"
"fmt"
"regexp"
"strings"
"text/template"

mcfgv1 "github.com/openshift/api/machineconfiguration/v1"
mcfgclientset "github.com/openshift/client-go/machineconfiguration/clientset/versioned"
command "github.com/openshift/imagebuilder/dockerfile/command"
parser "github.com/openshift/imagebuilder/dockerfile/parser"
"github.com/openshift/machine-config-operator/pkg/controller/build/constants"
"github.com/openshift/machine-config-operator/pkg/controller/build/utils"
chelpers "github.com/openshift/machine-config-operator/pkg/controller/common"
Expand Down Expand Up @@ -39,6 +42,29 @@ const (
machineConfigJSONFilename string = "machineconfig.json.gz"
)

var basicSyntaxRegex = regexp.MustCompile(`(?m)(?i)^\s*FROM`)

// instructionRequirements maps Containerfile instructions that MUST have arguments to their requirement descriptions
// Instructions not in this map either don't require arguments or have optional arguments
var instructionRequirements = map[string]string{
command.Add: "source and destination paths",
command.Arg: "a variable name",
command.Cmd: "a command or arguments",
command.Copy: "source and destination paths",
command.Entrypoint: "a command or arguments",
command.Env: "key=value pairs",
command.Expose: "a port",
command.From: "a base image",
command.Label: "key=value pairs",
command.Onbuild: "an instruction to execute",
command.Run: "a command",
command.Shell: "a shell command array",
command.StopSignal: "a signal",
command.User: "a username or UID",
command.Volume: "a mount point",
command.Workdir: "a directory path",
}
Comment thread
coderabbitai[bot] marked this conversation as resolved.

// Represents the request to build a layered OS image.
type buildRequestImpl struct {
opts BuildRequestOpts
Expand Down Expand Up @@ -69,6 +95,13 @@ func newBuildRequest(opts BuildRequestOpts) BuildRequest {
}
}

// Validate user's Containerfile if provided
if br.userContainerfile != "" {
if err := br.validateContainerfileSyntax(br.userContainerfile); err != nil {
klog.Warningf("User Containerfile validation failed")
}
}

return br
}

Expand Down Expand Up @@ -346,7 +379,124 @@ func (br buildRequestImpl) renderContainerfile() (string, error) {
return "", fmt.Errorf("could not execute containerfile template: %w", err)
}

return out.String(), nil
renderedContainerfile := out.String()

// Validate the rendered Containerfile
if err := br.validateContainerfileSyntax(renderedContainerfile); err != nil {
return "", fmt.Errorf("rendered containerfile validation failed: %w", err)
}

return renderedContainerfile, nil
}

// validateContainerfileSyntax validates the syntax of a Containerfile using basic checks and the imagebuilder parser.
func (br buildRequestImpl) validateContainerfileSyntax(containerfile string) error {
if containerfile == "" {
return fmt.Errorf("containerfile is empty")
}

if err := br.validateBasicSyntax(containerfile); err != nil {
return err
}

result, err := parser.Parse(strings.NewReader(containerfile))
if err != nil {
// For rendered templates, advanced syntax is expected
// Only fail on obvious syntax errors
if !isKnownParserLimitation(err) {
return fmt.Errorf("failed to parse Containerfile: %w", err)
}
// Log but don't fail for advanced syntax
klog.V(4).Infof("Containerfile uses syntax the parser doesn't support, skipping detailed parser validation: %v", err)
return nil
Comment on lines +402 to +411

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major

🧩 Analysis chain

🌐 Web query:

In github.com/openshift/imagebuilder v1.2.21 dockerfile/parser, what inputs produce the errors "can't find = in" and "must be of the form: name=value"? Do malformed ARG/ENV instructions emit those same messages?

💡 Result:

In the Dockerfile parser used by openshift/imagebuilder (which relies on the standard Dockerfile syntax parser), the errors "can't find = in [X]" and "must be of the form: name=value" occur when the parser encounters a line (typically ENV or LABEL) that it expects to follow the key=value format but fails to find the required equals sign within the expected token [1][2][3]. This error is almost always caused by malformed syntax in ENV or LABEL instructions, rather than ARG [1][4]. Common causes include: 1. Unexpected spaces: If you write ENV KEY = VALUE (with a space before the =), the parser treats the = as a separate token and fails to associate it correctly, often reporting the failure on the next word [2][5]. 2. Trailing characters or comments: If you add a comment at the end of an ENV or LABEL line using #, the parser may attempt to treat that comment as part of the instruction. If it encounters a word in the comment that lacks an =, it will fail [1][6]. 3. Unquoted values with special characters: If an environment variable value contains spaces or special characters that are not properly quoted, the parser may break the line into multiple tokens and attempt to interpret them as key=value pairs, eventually failing when it finds a token without an = [7][3][8]. 4. Line continuation issues: If a backslash () is used incorrectly, or if there is trailing whitespace after the backslash, it can disrupt the parser's view of the next line, leading to parsing errors [1][5]. Regarding malformed ARG/ENV instructions: The error message is characteristic of ENV and LABEL instructions, which strictly enforce the key=value syntax (or the older space-separated syntax, which is often source of confusion when mixed) [9][10][1]. ARG instructions behave differently; while they also support key=value, they are generally more lenient or throw different errors (e.g., "ARG requires exactly one argument") when formatted incorrectly [4]. Therefore, malformed ARG instructions do not typically emit these specific "can't find =" messages [4].

Citations:


🏁 Script executed:

#!/bin/bash
set -euo pipefail

FILE="pkg/controller/build/buildrequest/buildrequest.go"

# Show the exact helper + its callsites around the reported lines
sed -n '350,430p' "$FILE" | cat -n

echo "---- search for isKnownAdvancedSyntaxError definition ----"
rg -n "func isKnownAdvancedSyntaxError|isKnownAdvancedSyntaxError\(" "$FILE"

echo "---- show surrounding definition if found ----"
# Print a bit more around the function definition
# (rg line number output used by awk to slice)
line_def=$(rg -n "func isKnownAdvancedSyntaxError" "$FILE" | head -n1 | cut -d: -f1 || true)
if [[ -n "${line_def:-}" ]]; then
  start=$((line_def-40))
  end=$((line_def+80))
  sed -n "${start},${end}p" "$FILE" | cat -n
fi

Repository: openshift/machine-config-operator

Length of output: 7899


🏁 Script executed:

#!/bin/bash
set -euo pipefail
FILE="pkg/controller/build/buildrequest/buildrequest.go"

echo "---- locate instructionRequirements ----"
rg -n "instructionRequirements" "$FILE"

echo "---- show around instructionRequirements ----"
line=$(rg -n "instructionRequirements" "$FILE" | head -n1 | cut -d: -f1)
start=$((line-30)); end=$((line+120))
sed -n "${start},${end}p" "$FILE" | cat -n

echo "---- search for validateInstruction usage / other syntax validation ----"
rg -n "validateContainerfileSyntax|validateBasicSyntax|validateInstruction|validInstructions" "$FILE"

Repository: openshift/machine-config-operator

Length of output: 7771


🏁 Script executed:

#!/bin/bash
set -euo pipefail
FILE="pkg/controller/build/buildrequest/buildrequest.go"

echo "---- inspect validateInstruction fully (incl. instructionRequirements usage) ----"
sed -n '430,560p' "$FILE" | cat -n

echo "---- locate containerfileToConfigMap + renderedContainerfile flow ----"
rg -n "containerfileToConfigMap|renderContainerfile|validateContainerfileSyntax" "$FILE"

echo "---- show around containerfileToConfigMap ----"
line=$(rg -n "func \\(br buildRequestImpl\\) containerfileToConfigMap" "$FILE" | head -n1 | cut -d: -f1)
start=$((line-60)); end=$((line+220))
sed -n "${start},${end}p" "$FILE" | cat -n

echo "---- search for other isKnownAdvancedSyntaxError references ----"
rg -n "isKnownAdvancedSyntaxError" "$FILE"

echo "---- search repo for these exact substrings (any other suppression) ----"
rg -n "can't find = in|must be of the form: name=value" . || true

echo "---- inspect embedded Containerfile templates for heredoc/ENV/LABEL patterns ----"
ls -la pkg/controller/build/buildrequest/assets || true
for f in pkg/controller/build/buildrequest/assets/*; do
  echo "### $f"; sed -n '1,120p' "$f" | cat -n; echo; done

Repository: openshift/machine-config-operator

Length of output: 31915


🏁 Script executed:

#!/bin/bash
set -euo pipefail

# Inspect the exact parser code that emits the two error strings.
FILE1="vendor/github.com/openshift/imagebuilder/dockerfile/parser/line_parsers.go"
FILE2="vendor/github.com/moby/buildkit/frontend/dockerfile/parser/line_parsers.go"

echo "---- openshift/imagebuilder parser: lines around 'can't find = in' ----"
rg -n "can't find = in|must be of the form: name=value" "$FILE1"
line1=$(rg -n "can't find = in" "$FILE1" | head -n1 | cut -d: -f1)
start=$((line1-40)); end=$((line1+40))
sed -n "${start},${end}p" "$FILE1" | cat -n

echo
echo "---- openshift/imagebuilder parser: search for instruction types that use this error ----"
rg -n "Must be of the form: name=value|can't find = in" "$FILE1"

echo
echo "---- buildkit frontend parser: lines around 'can't find = in' ----"
rg -n "can't find = in|must be of the form: name=value" "$FILE2"
line2=$(rg -n "can't find = in" "$FILE2" | head -n1 | cut -d: -f1)
start2=$((line2-40)); end2=$((line2+40))
sed -n "${start2},${end2}p" "$FILE2" | cat -n

Repository: openshift/machine-config-operator

Length of output: 5935


🏁 Script executed:

#!/bin/bash
set -euo pipefail

FILE="vendor/github.com/openshift/imagebuilder/dockerfile/parser/line_parsers.go"

echo "---- locate parseArg / ARG parsing ----"
rg -n "parseArg\\b|ARG" "$FILE"

# show around parseArg if present
line=$(rg -n "func parseArg" "$FILE" | head -n1 | cut -d: -f1 || true)
if [[ -n "${line:-}" ]]; then
  start=$((line-60)); end=$((line+120))
  sed -n "${start},${end}p" "$FILE" | cat -n
fi

echo "---- also search entire vendor parser for the two error substrings ----"
rg -n "Syntax error - can't find = in|must be of the form: name=value" "vendor/github.com/openshift/imagebuilder/dockerfile/parser"

Repository: openshift/machine-config-operator

Length of output: 117


🏁 Script executed:

#!/bin/bash
set -euo pipefail
PARSER_DIR="vendor/github.com/openshift/imagebuilder/dockerfile/parser"

echo "---- search for ARG parsing entrypoints / directive constants ----"
rg -n "ARG\b|commandArg|parseArg|parse.*Arg|parse.*ARG" "$PARSER_DIR"

echo "---- search for common ARG error messages ----"
rg -n "ARG.*(exactly|one|argument|must have|requires)|must be of the form|can't find = in" "$PARSER_DIR"

echo "---- search for ENV/LABEL parser usage (parseEnv/parseLabel) ----"
rg -n "parseEnv\\b|parseLabel\\b" "$PARSER_DIR"

echo "---- inspect dispatcher that maps instruction token to parseX ----"
# Look for the function that dispatches based on instruction name
rg -n "switch .*\\{|case .*ARG|case .*ENV|case .*LABEL" "$PARSER_DIR" | head -n 50

Repository: openshift/machine-config-operator

Length of output: 144


🏁 Script executed:

#!/bin/bash
set -euo pipefail
PARSER_DIR="vendor/github.com/openshift/imagebuilder/dockerfile/parser"

echo "---- list vendored parser files ----"
find "$PARSER_DIR" -maxdepth 2 -type f | sort | sed -n '1,200p'

echo "---- search for ARG tokens anywhere in vendored parser (case-sensitive) ----"
rg -n "\\bARG\\b" "$PARSER_DIR" || true

echo "---- search for ENV/LABEL dispatch targets (case-sensitive) ----"
rg -n "\\bENV\\b|\\bLABEL\\b" "$PARSER_DIR" || true

echo "---- search for 'parse' functions that mention NameVal or key/value parsing ----"
rg -n "parseNameVal|parseEnv|parseLabel|name=value|can't find = in|must be of the form: name=value" "$PARSER_DIR" || true

echo "---- broad search for substring 'arg' (case-insensitive) to locate ARG parsing ----"
rg -n -i "arg" "$PARSER_DIR" | head -n 50 || true

echo "---- inspect any dispatcher/switch over instruction names ----"
rg -n "switch|case" "$PARSER_DIR" | head -n 80

Repository: openshift/machine-config-operator

Length of output: 5129


🏁 Script executed:

#!/bin/bash
set -euo pipefail
PARSER_DIR="vendor/github.com/openshift/imagebuilder/dockerfile/parser"
FILE="$PARSER_DIR/parser.go"
LP="$PARSER_DIR/line_parsers.go"

echo "---- show dispatcher mapping for command.Arg/Env/Label ----"
rg -n "command\\.Arg|parseNameOrNameVal|command\\.Env|command\\.Label" "$FILE"
sed -n '210,270p' "$FILE" | cat -n

echo "---- find parseNameOrNameVal in line_parsers.go ----"
rg -n "func parseNameOrNameVal" "$LP"
line=$(rg -n "func parseNameOrNameVal" "$LP" | head -n1 | cut -d: -f1)
start=$((line-40)); end=$((line+140))
sed -n "${start},${end}p" "$LP" | cat -n

echo "---- search for specific error strings in line_parsers.go ----"
rg -n "can't find = in|must be of the form: name=value" "$LP"

echo "---- show nearby comment about allowed assignments (parseNameVal section) ----"
# show around line 230-270 where parseNameVal comment starts
sed -n '220,280p' "$LP" | cat -n

Repository: openshift/machine-config-operator

Length of output: 11638


Narrow the parser-error bypass in validateContainerfileSyntax

  • isKnownAdvancedSyntaxError() currently matches generic substrings ("can't find = in" / "must be of the form: name=value") and then returns nil, skipping all AST/instruction validation (pkg/controller/build/buildrequest/buildrequest.go:392-401).
  • In github.com/openshift/imagebuilder/dockerfile/parser, those exact errors are produced by parseNameVal() and are used for ENV and LABEL parsing (not ARG), so malformed ENV/LABEL can slip through the preflight check.
  • Replace this broad deny-list substring matching with an allow-list approach that tolerates only the specific advanced/heredoc constructs you intend to accept, while still failing invalid ENV/LABEL syntax.
🤖 Prompt for AI Agents
Verify each finding against current code. Fix only still-valid issues, skip the
rest with a brief reason, keep changes minimal, and validate.

In `@pkg/controller/build/buildrequest/buildrequest.go` around lines 392 - 401,
The current bypass in validateContainerfileSyntax uses
isKnownAdvancedSyntaxError to match generic substrings and returns nil for any
match, which lets malformed ENV/LABEL pass; update isKnownAdvancedSyntaxError
(and its callers in validateContainerfileSyntax where parser.Parse error is
handled) to switch from deny-list substring matches to an explicit allow-list of
only the specific advanced constructs you intend to accept (e.g.,
heredoc/continued-line tokens or exact parser error codes/messages that
represent valid advanced syntax), and ensure errors from parseNameVal-related
messages (the ENV/LABEL "can't find = in" / "must be of the form: name=value")
are not allowed and still cause Parse to return an error so malformed ENV/LABEL
fail the preflight check.

Source: Coding guidelines

}

if result.AST == nil || len(result.AST.Children) == 0 {
return fmt.Errorf("parsed Containerfile has no instructions")
}

// Validate each instruction
for _, child := range result.AST.Children {
if err := br.validateInstruction(child); err != nil {
return err
}
}

return nil
}

// validateBasicSyntax performs basic sanity checks that don't require parsing.
func (br buildRequestImpl) validateBasicSyntax(containerfile string) error {
// Check for at least one FROM instruction using regex
if !basicSyntaxRegex.MatchString(containerfile) {
return fmt.Errorf("Containerfile must contain at least one FROM instruction")
}

return nil
}

// isKnownParserLimitation checks if a parser error is from known limitations of the
// imagebuilder parser library that buildah/podman handle correctly.
func isKnownParserLimitation(err error) bool {
if err == nil {
return false
}

errMsg := err.Error()
knownPatterns := []string{
"<<",
"<<-",
"heredoc",
"can't find = in",
"must be of the form: name=value",
}

for _, pattern := range knownPatterns {
if strings.Contains(errMsg, pattern) {

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

thought (non-blocking): Typically, one shouldn't match errors in this way. However, given that the parser returns an error whose concrete type is general-purpose, this approach is fine here.

return true
}
}

return false
}

// validateInstruction validates that an instruction is valid and has required arguments.
func (br buildRequestImpl) validateInstruction(node *parser.Node) error {
if node == nil || node.Value == "" {
return nil
}

instruction := node.Value

// First check if this is a valid Dockerfile instruction
if _, exists := command.Commands[instruction]; !exists {
return fmt.Errorf("unknown Dockerfile instruction: %s", node.Value)
}

// NONE is valid without additional arguments
if instruction == command.Healthcheck && node.Next != nil && strings.EqualFold(node.Next.Value, "NONE") {
return nil
}

// Then check if it has required arguments
requirement, requiresArgs := instructionRequirements[instruction]
if requiresArgs {
// Check if the node has arguments
hasArgs := false
if node.Next != nil && node.Next.Value != "" {
hasArgs = true
}
// Also check if there's a raw attribute that contains args
if !hasArgs && node.Attributes != nil && len(node.Attributes) > 0 {
hasArgs = true
}

if !hasArgs {
return fmt.Errorf("%s instruction requires %s", strings.ToUpper(instruction), requirement)
}
}

return nil
}

// podToJob creates a Job with the spec of the given Pod
Expand Down
171 changes: 171 additions & 0 deletions pkg/controller/build/buildrequest/buildrequest_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -334,3 +334,174 @@ RUN rpm-ostree install && \
},
}
}

// TestValidateContainerfileSyntax tests the Containerfile validation logic
func TestValidateContainerfileSyntax(t *testing.T) {
t.Parallel()

br := &buildRequestImpl{}

testCases := []struct {
name string
containerfile string
expectError bool
errorContains string
}{
{
name: "Valid simple Containerfile",
containerfile: `FROM registry.example.com/base:latest
RUN echo "hello"
COPY . /app`,
expectError: false,
},
{
name: "Empty Containerfile",
containerfile: "",
expectError: true,
errorContains: "containerfile is empty",
},
{
name: "No FROM instruction",
containerfile: `RUN echo "hello"
COPY . /app`,
expectError: true,
errorContains: "must contain at least one FROM instruction",
},
{
name: "FROM without arguments",
containerfile: `FROM`,
expectError: true,
errorContains: "FROM instruction requires a base image",
},
{
name: "RUN without arguments",
containerfile: `FROM registry.example.com/base:latest
RUN`,
expectError: true,
errorContains: "RUN instruction requires a command",
},
{
name: "COPY without arguments",
containerfile: `FROM registry.example.com/base:latest
COPY`,
expectError: true,
errorContains: "COPY instruction requires source and destination paths",
},
{
name: "WORKDIR without arguments",
containerfile: `FROM registry.example.com/base:latest
WORKDIR`,
expectError: true,
errorContains: "WORKDIR instruction requires a directory path",
},
{
name: "Valid Containerfile with comments",
containerfile: `# Base image
FROM registry.example.com/base:latest
# Install dependencies
RUN yum install -y python3`,
expectError: false,
},
{
name: "Advanced syntax with heredoc - should pass",
containerfile: `FROM registry.example.com/base:latest
RUN bash <<'EOF'
set -xeuo pipefail
echo "hello"
EOF
`,
expectError: false, // Hybrid validator allows advanced syntax
},
}

for _, testCase := range testCases {
testCase := testCase
t.Run(testCase.name, func(t *testing.T) {
t.Parallel()

err := br.validateContainerfileSyntax(testCase.containerfile)

if testCase.expectError {
assert.Error(t, err)
if testCase.errorContains != "" {
assert.Contains(t, err.Error(), testCase.errorContains)
}
} else {
assert.NoError(t, err)
}
})
}
}

// TestValidateContainerfileInBuildRequest tests that validation happens during the build request flow
func TestValidateContainerfileInBuildRequest(t *testing.T) {
t.Parallel()

t.Run("Valid user Containerfile renders successfully", func(t *testing.T) {
opts := getBuildRequestOpts()
opts.MachineOSConfig.Spec.Containerfile[0].Content = `FROM configs AS final
RUN rpm-ostree install vim && ostree container commit`

br := newBuildRequest(opts)
configMaps, err := br.ConfigMaps()

assert.NoError(t, err)
assert.NotNil(t, configMaps)
// Verify the Containerfile ConfigMap was created
assert.Greater(t, len(configMaps), 0)
})
}

func TestValidateUnknownInstruction(t *testing.T) {
t.Parallel()

testCases := []struct {
name string
containerfile string
expectError bool
errorContains string
}{
{
name: "Unknown instruction BADINSTRUCTION",
containerfile: `FROM registry.example.com/base:latest
BADINSTRUCTION this should fail
RUN echo "test"`,
expectError: true,
errorContains: "unknown Dockerfile instruction",
},
{
name: "Unknown instruction INVALIDCMD",
containerfile: `FROM registry.example.com/base:latest
RUN echo "valid"
INVALIDCMD foo bar`,
expectError: true,
errorContains: "unknown Dockerfile instruction",
},
{
name: "Valid instructions only",
containerfile: `FROM registry.example.com/base:latest
RUN echo "test"
COPY . /app
ENV FOO=bar`,
expectError: false,
},
}

for _, tc := range testCases {
t.Run(tc.name, func(t *testing.T) {
t.Parallel()

br := &buildRequestImpl{}
err := br.validateContainerfileSyntax(tc.containerfile)

if tc.expectError {
assert.Error(t, err)
if tc.errorContains != "" {
assert.Contains(t, err.Error(), tc.errorContains)
}
} else {
assert.NoError(t, err)
}
})
}
}
15 changes: 0 additions & 15 deletions vendor/github.com/asaskevich/govalidator/.gitignore

This file was deleted.

12 changes: 0 additions & 12 deletions vendor/github.com/asaskevich/govalidator/.travis.yml

This file was deleted.

Loading