Skip to content

[Code scan] Chained $ref paths are resolved against the process cwd #121

Description

@njzjz

This issue was found by a Codex global code scan of the repository.

Affected code:

dargs/dargs/dargs.py

Lines 1113 to 1158 in b4db564

def _load_ref(ref_path: str) -> dict:
"""Load a dict from an external file referenced by ``$ref``.
Parameters
----------
ref_path : str
Path to the external file. Supported extensions: ``.json``, ``.yml``, ``.yaml``.
Returns
-------
dict
The loaded dict from the external file.
Raises
------
ValueError
If the file extension is not supported, or if the file does not contain a
top-level mapping/object.
ImportError
If pyyaml is not installed and a YAML file is requested.
"""
ext = os.path.splitext(ref_path)[1].lower()
if ext == ".json":
with open(ref_path, encoding="utf-8") as f:
loaded = json.load(f)
elif ext in (".yml", ".yaml"):
try:
import yaml
except ImportError as e:
raise ImportError(
"pyyaml is required to load YAML files referenced by $ref. "
"Install it with: pip install pyyaml"
) from e
with open(ref_path, encoding="utf-8") as f:
loaded = yaml.safe_load(f)
else:
raise ValueError(
f"Unsupported file extension `{ext}` for $ref. "
"Supported extensions are: .json, .yml, .yaml"
)
if not isinstance(loaded, dict):
raise ValueError(
f"Referenced file {ref_path!r} must contain a mapping/object at the top "
f"level, but got {type(loaded).__name__!r}."
)
return loaded

dargs/dargs/dargs.py

Lines 1161 to 1204 in b4db564

def _resolve_ref(d: dict, allow_ref: bool = False) -> None:
"""Resolve the ``$ref`` key in a dict by loading from an external file.
If ``$ref`` is present in ``d``, its value is treated as a file path.
The file is loaded and its contents are merged into ``d``. Keys already
present in ``d`` (other than ``$ref``) take precedence over keys from the
loaded file, allowing local overrides. Chained ``$ref`` values in the
loaded content are resolved in turn. Cyclic references are detected and
raise a ``ValueError``.
The dict is modified **in place**.
Parameters
----------
d : dict
The dict that may contain a ``$ref`` key.
allow_ref : bool, optional
If False (the default), raise a ``ValueError`` when ``$ref`` is found.
Set to True to enable loading from external files.
Raises
------
ValueError
If ``$ref`` is found but ``allow_ref`` is False, or if a cyclic
reference is detected.
"""
if "$ref" not in d:
return
if not allow_ref:
raise ValueError(
"$ref is not allowed by default. "
"Pass allow_ref=True to enable loading from external files."
)
visited_refs: set[str] = set()
while "$ref" in d:
ref_path = d.pop("$ref")
if ref_path in visited_refs:
raise ValueError(f"Cyclic $ref detected for path: {ref_path!r}")
visited_refs.add(ref_path)
loaded = _load_ref(ref_path)
# Merge: loaded content as base, local keys take precedence
merged = {**loaded, **d}
d.clear()
d.update(merged)

dargs/tests/test_ref.py

Lines 213 to 227 in b4db564

def test_ref_chained(self) -> None:
"""A $ref that loads a file containing another $ref is fully resolved."""
inner_path = self._write_json("ref_inner.json", {"sub1": 7, "sub2": "inner"})
outer_path = self._write_json("ref_outer.json", {"$ref": inner_path})
ca = Argument(
"base",
dict,
[
Argument("sub1", int),
Argument("sub2", str),
],
)
result = ca.normalize({"base": {"$ref": outer_path}}, allow_ref=True)
self.assertEqual(result["base"]["sub1"], 7)
self.assertEqual(result["base"]["sub2"], "inner")

Problem:
_resolve_ref() loads every $ref path directly through _load_ref(ref_path). If a referenced file contains another relative $ref, the second path is resolved against the current working directory, not the directory of the file that declared it.

Reproducer:

import json, os, tempfile
from dargs import Argument

with tempfile.TemporaryDirectory() as td:
    inner = os.path.join(td, "inner.json")
    outer = os.path.join(td, "outer.json")
    with open(inner, "w") as f:
        json.dump({"sub1": 7}, f)
    with open(outer, "w") as f:
        json.dump({"$ref": "inner.json"}, f)

    Argument("base", dict, [Argument("sub1", int)]).normalize(
        {"base": {"$ref": outer}}, allow_ref=True
    )

Observed behavior:

FileNotFoundError: [Errno 2] No such file or directory: 'inner.json'

Expected behavior:
Nested relative $ref paths should resolve relative to the containing referenced file, so reusable config fragments can live together in the same directory.

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type
    No fields configured for issues without a type.

    Projects

    Status
    Todo

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions