Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 7 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,13 @@ This downloads the latest shared `liblbug` binary (via upstream
The Python package is installed directly from `src_py/`, so the standalone
workflow no longer depends on `./build/ladybug`.

Run tests with:
In order to run tests you'll need to pull data first with:

```bash
git submodule update --init --recursive
```

Then run tests with:

```bash
uv run pytest
Expand Down
2 changes: 2 additions & 0 deletions src_cpp/py_query_result_converter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,7 @@ void NPArrayWrapper::appendElement(Value* value) {
((int64_t*)dataBuffer)[numElements] =
Interval::getNanoseconds(value->getValue<interval_t>());
} break;
case LogicalTypeID::JSON:
case LogicalTypeID::STRING: {
auto val = value->getValue<std::string>();
py::str result(val);
Expand Down Expand Up @@ -167,6 +168,7 @@ py::dtype NPArrayWrapper::convertToArrayType(const LogicalType& type) {
case LogicalTypeID::INTERVAL: {
dtype = "timedelta64[ns]";
} break;
case LogicalTypeID::JSON:
case LogicalTypeID::DECIMAL:
case LogicalTypeID::UNION:
case LogicalTypeID::BLOB:
Expand Down
84 changes: 84 additions & 0 deletions test/test_json.py
Original file line number Diff line number Diff line change
Expand Up @@ -155,3 +155,87 @@ def test_to_json_python_param_with_homogeneous_list_uses_typed_binding(

assert normalized_query == query
assert normalized_parameters == parameters


def test_get_as_df_json_scalar(conn_db_empty: ConnDB) -> None:
"""
Scalar JSON values convert through get_as_df() as Python strings.

Covers non-null values, typed JSON nulls, and a mixed column in a single query.
"""
conn, _ = conn_db_empty
df = conn.execute(
"UNWIND ["
'CAST(\'{"a": 1, "b": [2, 3]}\' AS JSON), '
"CAST(NULL AS JSON), "
"CAST('[1, 2, 3]' AS JSON)"
"] AS j RETURN j"
).get_as_df()

assert str(df["j"].dtype) == "object"
assert df["j"].isna().tolist() == [False, True, False]

first = df["j"].iloc[0]
assert isinstance(first, str)
assert json.loads(first) == {"a": 1, "b": [2, 3]}

third = df["j"].iloc[2]
assert isinstance(third, str)
assert json.loads(third) == [1, 2, 3]


def test_get_as_df_json_empty_result(conn_db_empty: ConnDB) -> None:
"""
An empty result over a JSON column builds the column without crashing.

convertToArrayType() runs during NPArrayWrapper construction, before any
rows are iterated, so a zero-row JSON result is the minimal reproduction
for the original dtype-selection crash.
"""
conn, _ = conn_db_empty
conn.execute("CREATE NODE TABLE t (id SERIAL PRIMARY KEY, data JSON)")

df = conn.execute("MATCH (n:t) RETURN n.data AS data").get_as_df()

assert len(df) == 0
assert str(df["data"].dtype) == "object"


def test_get_as_df_json_extract(conn_db_empty: ConnDB) -> None:
"""json_extract() produces a scalar JSON result that converts via get_as_df()."""
conn, _ = conn_db_empty
conn.execute("INSTALL json; LOAD json;")
conn.execute("CREATE NODE TABLE t (id SERIAL PRIMARY KEY, data JSON)")

data = {"name": {"first": "Alice", "last": "Smith"}}
conn.execute(
"CREATE (n:t {data: to_json($data)})",
parameters={"data": json.dumps(data)},
)

df = conn.execute(
"MATCH (n:t) RETURN json_extract(n.data, '$.name') AS name"
).get_as_df()

assert str(df["name"].dtype) == "object"
val = df["name"].iloc[0]
assert isinstance(val, str)
assert json.loads(val) == {"first": "Alice", "last": "Smith"}


def test_get_as_df_json_list(conn_db_empty: ConnDB) -> None:
"""JSON[] (LIST of JSON) columns keep their existing pandas behavior."""
conn, _ = conn_db_empty
conn.execute("INSTALL json; LOAD json;")
conn.execute("CREATE NODE TABLE t (id SERIAL PRIMARY KEY, data JSON[])")

data = [{"x": 1}, {"x": 2}, {"x": 3}]
conn.execute("CREATE (n:t {data: $d})", parameters={"d": data})

df = conn.execute("MATCH (n:t) RETURN n.data AS data").get_as_df()

assert str(df["data"].dtype) == "object"
val = df["data"].iloc[0]
assert isinstance(val, list)
assert len(val) == 3
assert all(isinstance(e, str) for e in val)