-
Notifications
You must be signed in to change notification settings - Fork 502
MCO-1333: Validate OCL Containerfiles #6187
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Large diffs are not rendered by default.
Large diffs are not rendered by default.
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -5,11 +5,14 @@ import ( | |
| _ "embed" | ||
| "encoding/json" | ||
| "fmt" | ||
| "regexp" | ||
| "strings" | ||
| "text/template" | ||
|
|
||
| mcfgv1 "github.com/openshift/api/machineconfiguration/v1" | ||
| mcfgclientset "github.com/openshift/client-go/machineconfiguration/clientset/versioned" | ||
| command "github.com/openshift/imagebuilder/dockerfile/command" | ||
| parser "github.com/openshift/imagebuilder/dockerfile/parser" | ||
| "github.com/openshift/machine-config-operator/pkg/controller/build/constants" | ||
| "github.com/openshift/machine-config-operator/pkg/controller/build/utils" | ||
| chelpers "github.com/openshift/machine-config-operator/pkg/controller/common" | ||
|
|
@@ -39,6 +42,29 @@ const ( | |
| machineConfigJSONFilename string = "machineconfig.json.gz" | ||
| ) | ||
|
|
||
| var basicSyntaxRegex = regexp.MustCompile(`(?m)(?i)^\s*FROM`) | ||
|
|
||
| // instructionRequirements maps Containerfile instructions that MUST have arguments to their requirement descriptions | ||
| // Instructions not in this map either don't require arguments or have optional arguments | ||
| var instructionRequirements = map[string]string{ | ||
| command.Add: "source and destination paths", | ||
| command.Arg: "a variable name", | ||
| command.Cmd: "a command or arguments", | ||
| command.Copy: "source and destination paths", | ||
| command.Entrypoint: "a command or arguments", | ||
| command.Env: "key=value pairs", | ||
| command.Expose: "a port", | ||
| command.From: "a base image", | ||
| command.Label: "key=value pairs", | ||
| command.Onbuild: "an instruction to execute", | ||
| command.Run: "a command", | ||
| command.Shell: "a shell command array", | ||
| command.StopSignal: "a signal", | ||
| command.User: "a username or UID", | ||
| command.Volume: "a mount point", | ||
| command.Workdir: "a directory path", | ||
| } | ||
|
|
||
| // Represents the request to build a layered OS image. | ||
| type buildRequestImpl struct { | ||
| opts BuildRequestOpts | ||
|
|
@@ -69,6 +95,13 @@ func newBuildRequest(opts BuildRequestOpts) BuildRequest { | |
| } | ||
| } | ||
|
|
||
| // Validate user's Containerfile if provided | ||
| if br.userContainerfile != "" { | ||
| if err := br.validateContainerfileSyntax(br.userContainerfile); err != nil { | ||
| klog.Warningf("User Containerfile validation failed") | ||
| } | ||
| } | ||
|
|
||
| return br | ||
| } | ||
|
|
||
|
|
@@ -346,7 +379,124 @@ func (br buildRequestImpl) renderContainerfile() (string, error) { | |
| return "", fmt.Errorf("could not execute containerfile template: %w", err) | ||
| } | ||
|
|
||
| return out.String(), nil | ||
| renderedContainerfile := out.String() | ||
|
|
||
| // Validate the rendered Containerfile | ||
| if err := br.validateContainerfileSyntax(renderedContainerfile); err != nil { | ||
| return "", fmt.Errorf("rendered containerfile validation failed: %w", err) | ||
| } | ||
|
|
||
| return renderedContainerfile, nil | ||
| } | ||
|
|
||
| // validateContainerfileSyntax validates the syntax of a Containerfile using basic checks and the imagebuilder parser. | ||
| func (br buildRequestImpl) validateContainerfileSyntax(containerfile string) error { | ||
| if containerfile == "" { | ||
| return fmt.Errorf("containerfile is empty") | ||
| } | ||
|
|
||
| if err := br.validateBasicSyntax(containerfile); err != nil { | ||
| return err | ||
| } | ||
|
|
||
| result, err := parser.Parse(strings.NewReader(containerfile)) | ||
| if err != nil { | ||
| // For rendered templates, advanced syntax is expected | ||
| // Only fail on obvious syntax errors | ||
| if !isKnownParserLimitation(err) { | ||
| return fmt.Errorf("failed to parse Containerfile: %w", err) | ||
| } | ||
| // Log but don't fail for advanced syntax | ||
| klog.V(4).Infof("Containerfile uses syntax the parser doesn't support, skipping detailed parser validation: %v", err) | ||
| return nil | ||
|
Comment on lines
+402
to
+411
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🧩 Analysis chain🌐 Web query:
💡 Result: In the Dockerfile parser used by openshift/imagebuilder (which relies on the standard Dockerfile syntax parser), the errors "can't find = in [X]" and "must be of the form: name=value" occur when the parser encounters a line (typically ENV or LABEL) that it expects to follow the key=value format but fails to find the required equals sign within the expected token [1][2][3]. This error is almost always caused by malformed syntax in ENV or LABEL instructions, rather than ARG [1][4]. Common causes include: 1. Unexpected spaces: If you write ENV KEY = VALUE (with a space before the =), the parser treats the = as a separate token and fails to associate it correctly, often reporting the failure on the next word [2][5]. 2. Trailing characters or comments: If you add a comment at the end of an ENV or LABEL line using #, the parser may attempt to treat that comment as part of the instruction. If it encounters a word in the comment that lacks an =, it will fail [1][6]. 3. Unquoted values with special characters: If an environment variable value contains spaces or special characters that are not properly quoted, the parser may break the line into multiple tokens and attempt to interpret them as key=value pairs, eventually failing when it finds a token without an = [7][3][8]. 4. Line continuation issues: If a backslash () is used incorrectly, or if there is trailing whitespace after the backslash, it can disrupt the parser's view of the next line, leading to parsing errors [1][5]. Regarding malformed ARG/ENV instructions: The error message is characteristic of ENV and LABEL instructions, which strictly enforce the key=value syntax (or the older space-separated syntax, which is often source of confusion when mixed) [9][10][1]. ARG instructions behave differently; while they also support key=value, they are generally more lenient or throw different errors (e.g., "ARG requires exactly one argument") when formatted incorrectly [4]. Therefore, malformed ARG instructions do not typically emit these specific "can't find =" messages [4]. Citations:
🏁 Script executed: #!/bin/bash
set -euo pipefail
FILE="pkg/controller/build/buildrequest/buildrequest.go"
# Show the exact helper + its callsites around the reported lines
sed -n '350,430p' "$FILE" | cat -n
echo "---- search for isKnownAdvancedSyntaxError definition ----"
rg -n "func isKnownAdvancedSyntaxError|isKnownAdvancedSyntaxError\(" "$FILE"
echo "---- show surrounding definition if found ----"
# Print a bit more around the function definition
# (rg line number output used by awk to slice)
line_def=$(rg -n "func isKnownAdvancedSyntaxError" "$FILE" | head -n1 | cut -d: -f1 || true)
if [[ -n "${line_def:-}" ]]; then
start=$((line_def-40))
end=$((line_def+80))
sed -n "${start},${end}p" "$FILE" | cat -n
fiRepository: openshift/machine-config-operator Length of output: 7899 🏁 Script executed: #!/bin/bash
set -euo pipefail
FILE="pkg/controller/build/buildrequest/buildrequest.go"
echo "---- locate instructionRequirements ----"
rg -n "instructionRequirements" "$FILE"
echo "---- show around instructionRequirements ----"
line=$(rg -n "instructionRequirements" "$FILE" | head -n1 | cut -d: -f1)
start=$((line-30)); end=$((line+120))
sed -n "${start},${end}p" "$FILE" | cat -n
echo "---- search for validateInstruction usage / other syntax validation ----"
rg -n "validateContainerfileSyntax|validateBasicSyntax|validateInstruction|validInstructions" "$FILE"Repository: openshift/machine-config-operator Length of output: 7771 🏁 Script executed: #!/bin/bash
set -euo pipefail
FILE="pkg/controller/build/buildrequest/buildrequest.go"
echo "---- inspect validateInstruction fully (incl. instructionRequirements usage) ----"
sed -n '430,560p' "$FILE" | cat -n
echo "---- locate containerfileToConfigMap + renderedContainerfile flow ----"
rg -n "containerfileToConfigMap|renderContainerfile|validateContainerfileSyntax" "$FILE"
echo "---- show around containerfileToConfigMap ----"
line=$(rg -n "func \\(br buildRequestImpl\\) containerfileToConfigMap" "$FILE" | head -n1 | cut -d: -f1)
start=$((line-60)); end=$((line+220))
sed -n "${start},${end}p" "$FILE" | cat -n
echo "---- search for other isKnownAdvancedSyntaxError references ----"
rg -n "isKnownAdvancedSyntaxError" "$FILE"
echo "---- search repo for these exact substrings (any other suppression) ----"
rg -n "can't find = in|must be of the form: name=value" . || true
echo "---- inspect embedded Containerfile templates for heredoc/ENV/LABEL patterns ----"
ls -la pkg/controller/build/buildrequest/assets || true
for f in pkg/controller/build/buildrequest/assets/*; do
echo "### $f"; sed -n '1,120p' "$f" | cat -n; echo; doneRepository: openshift/machine-config-operator Length of output: 31915 🏁 Script executed: #!/bin/bash
set -euo pipefail
# Inspect the exact parser code that emits the two error strings.
FILE1="vendor/github.com/openshift/imagebuilder/dockerfile/parser/line_parsers.go"
FILE2="vendor/github.com/moby/buildkit/frontend/dockerfile/parser/line_parsers.go"
echo "---- openshift/imagebuilder parser: lines around 'can't find = in' ----"
rg -n "can't find = in|must be of the form: name=value" "$FILE1"
line1=$(rg -n "can't find = in" "$FILE1" | head -n1 | cut -d: -f1)
start=$((line1-40)); end=$((line1+40))
sed -n "${start},${end}p" "$FILE1" | cat -n
echo
echo "---- openshift/imagebuilder parser: search for instruction types that use this error ----"
rg -n "Must be of the form: name=value|can't find = in" "$FILE1"
echo
echo "---- buildkit frontend parser: lines around 'can't find = in' ----"
rg -n "can't find = in|must be of the form: name=value" "$FILE2"
line2=$(rg -n "can't find = in" "$FILE2" | head -n1 | cut -d: -f1)
start2=$((line2-40)); end2=$((line2+40))
sed -n "${start2},${end2}p" "$FILE2" | cat -nRepository: openshift/machine-config-operator Length of output: 5935 🏁 Script executed: #!/bin/bash
set -euo pipefail
FILE="vendor/github.com/openshift/imagebuilder/dockerfile/parser/line_parsers.go"
echo "---- locate parseArg / ARG parsing ----"
rg -n "parseArg\\b|ARG" "$FILE"
# show around parseArg if present
line=$(rg -n "func parseArg" "$FILE" | head -n1 | cut -d: -f1 || true)
if [[ -n "${line:-}" ]]; then
start=$((line-60)); end=$((line+120))
sed -n "${start},${end}p" "$FILE" | cat -n
fi
echo "---- also search entire vendor parser for the two error substrings ----"
rg -n "Syntax error - can't find = in|must be of the form: name=value" "vendor/github.com/openshift/imagebuilder/dockerfile/parser"Repository: openshift/machine-config-operator Length of output: 117 🏁 Script executed: #!/bin/bash
set -euo pipefail
PARSER_DIR="vendor/github.com/openshift/imagebuilder/dockerfile/parser"
echo "---- search for ARG parsing entrypoints / directive constants ----"
rg -n "ARG\b|commandArg|parseArg|parse.*Arg|parse.*ARG" "$PARSER_DIR"
echo "---- search for common ARG error messages ----"
rg -n "ARG.*(exactly|one|argument|must have|requires)|must be of the form|can't find = in" "$PARSER_DIR"
echo "---- search for ENV/LABEL parser usage (parseEnv/parseLabel) ----"
rg -n "parseEnv\\b|parseLabel\\b" "$PARSER_DIR"
echo "---- inspect dispatcher that maps instruction token to parseX ----"
# Look for the function that dispatches based on instruction name
rg -n "switch .*\\{|case .*ARG|case .*ENV|case .*LABEL" "$PARSER_DIR" | head -n 50Repository: openshift/machine-config-operator Length of output: 144 🏁 Script executed: #!/bin/bash
set -euo pipefail
PARSER_DIR="vendor/github.com/openshift/imagebuilder/dockerfile/parser"
echo "---- list vendored parser files ----"
find "$PARSER_DIR" -maxdepth 2 -type f | sort | sed -n '1,200p'
echo "---- search for ARG tokens anywhere in vendored parser (case-sensitive) ----"
rg -n "\\bARG\\b" "$PARSER_DIR" || true
echo "---- search for ENV/LABEL dispatch targets (case-sensitive) ----"
rg -n "\\bENV\\b|\\bLABEL\\b" "$PARSER_DIR" || true
echo "---- search for 'parse' functions that mention NameVal or key/value parsing ----"
rg -n "parseNameVal|parseEnv|parseLabel|name=value|can't find = in|must be of the form: name=value" "$PARSER_DIR" || true
echo "---- broad search for substring 'arg' (case-insensitive) to locate ARG parsing ----"
rg -n -i "arg" "$PARSER_DIR" | head -n 50 || true
echo "---- inspect any dispatcher/switch over instruction names ----"
rg -n "switch|case" "$PARSER_DIR" | head -n 80Repository: openshift/machine-config-operator Length of output: 5129 🏁 Script executed: #!/bin/bash
set -euo pipefail
PARSER_DIR="vendor/github.com/openshift/imagebuilder/dockerfile/parser"
FILE="$PARSER_DIR/parser.go"
LP="$PARSER_DIR/line_parsers.go"
echo "---- show dispatcher mapping for command.Arg/Env/Label ----"
rg -n "command\\.Arg|parseNameOrNameVal|command\\.Env|command\\.Label" "$FILE"
sed -n '210,270p' "$FILE" | cat -n
echo "---- find parseNameOrNameVal in line_parsers.go ----"
rg -n "func parseNameOrNameVal" "$LP"
line=$(rg -n "func parseNameOrNameVal" "$LP" | head -n1 | cut -d: -f1)
start=$((line-40)); end=$((line+140))
sed -n "${start},${end}p" "$LP" | cat -n
echo "---- search for specific error strings in line_parsers.go ----"
rg -n "can't find = in|must be of the form: name=value" "$LP"
echo "---- show nearby comment about allowed assignments (parseNameVal section) ----"
# show around line 230-270 where parseNameVal comment starts
sed -n '220,280p' "$LP" | cat -nRepository: openshift/machine-config-operator Length of output: 11638 Narrow the parser-error bypass in
🤖 Prompt for AI AgentsSource: Coding guidelines |
||
| } | ||
|
|
||
| if result.AST == nil || len(result.AST.Children) == 0 { | ||
| return fmt.Errorf("parsed Containerfile has no instructions") | ||
| } | ||
|
|
||
| // Validate each instruction | ||
| for _, child := range result.AST.Children { | ||
| if err := br.validateInstruction(child); err != nil { | ||
| return err | ||
| } | ||
| } | ||
|
|
||
| return nil | ||
| } | ||
|
|
||
| // validateBasicSyntax performs basic sanity checks that don't require parsing. | ||
| func (br buildRequestImpl) validateBasicSyntax(containerfile string) error { | ||
| // Check for at least one FROM instruction using regex | ||
| if !basicSyntaxRegex.MatchString(containerfile) { | ||
| return fmt.Errorf("Containerfile must contain at least one FROM instruction") | ||
| } | ||
|
|
||
| return nil | ||
| } | ||
|
|
||
| // isKnownParserLimitation checks if a parser error is from known limitations of the | ||
| // imagebuilder parser library that buildah/podman handle correctly. | ||
| func isKnownParserLimitation(err error) bool { | ||
| if err == nil { | ||
| return false | ||
| } | ||
|
|
||
| errMsg := err.Error() | ||
| knownPatterns := []string{ | ||
| "<<", | ||
| "<<-", | ||
| "heredoc", | ||
| "can't find = in", | ||
| "must be of the form: name=value", | ||
| } | ||
|
|
||
| for _, pattern := range knownPatterns { | ||
| if strings.Contains(errMsg, pattern) { | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. thought (non-blocking): Typically, one shouldn't match errors in this way. However, given that the parser returns an error whose concrete type is general-purpose, this approach is fine here. |
||
| return true | ||
| } | ||
| } | ||
|
|
||
| return false | ||
| } | ||
|
|
||
| // validateInstruction validates that an instruction is valid and has required arguments. | ||
| func (br buildRequestImpl) validateInstruction(node *parser.Node) error { | ||
| if node == nil || node.Value == "" { | ||
| return nil | ||
| } | ||
|
|
||
| instruction := node.Value | ||
|
|
||
| // First check if this is a valid Dockerfile instruction | ||
| if _, exists := command.Commands[instruction]; !exists { | ||
| return fmt.Errorf("unknown Dockerfile instruction: %s", node.Value) | ||
| } | ||
|
|
||
| // NONE is valid without additional arguments | ||
| if instruction == command.Healthcheck && node.Next != nil && strings.EqualFold(node.Next.Value, "NONE") { | ||
| return nil | ||
| } | ||
|
|
||
| // Then check if it has required arguments | ||
| requirement, requiresArgs := instructionRequirements[instruction] | ||
| if requiresArgs { | ||
| // Check if the node has arguments | ||
| hasArgs := false | ||
| if node.Next != nil && node.Next.Value != "" { | ||
| hasArgs = true | ||
| } | ||
| // Also check if there's a raw attribute that contains args | ||
| if !hasArgs && node.Attributes != nil && len(node.Attributes) > 0 { | ||
| hasArgs = true | ||
| } | ||
|
|
||
| if !hasArgs { | ||
| return fmt.Errorf("%s instruction requires %s", strings.ToUpper(instruction), requirement) | ||
| } | ||
| } | ||
|
|
||
| return nil | ||
| } | ||
|
|
||
| // podToJob creates a Job with the spec of the given Pod | ||
|
|
||
This file was deleted.
This file was deleted.
Uh oh!
There was an error while loading. Please reload this page.