From e3142ce82082f9a0e773629c6980e1ce1763629a Mon Sep 17 00:00:00 2001 From: Matilde Morrone Date: Fri, 28 Mar 2025 11:22:39 +0100 Subject: [PATCH 01/26] Fix inconsistencies in inputs --- assets/inputs/compact.corn | 2 +- assets/inputs/complex_keys.corn | 2 +- assets/inputs/very_compact.corn | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/assets/inputs/compact.corn b/assets/inputs/compact.corn index 49185de..0e78286 100644 --- a/assets/inputs/compact.corn +++ b/assets/inputs/compact.corn @@ -8,7 +8,7 @@ seven={foo=[] bar=[]} eight=["foo""bar"] - nine=[truefalse] + nine=[true false] ten=[1 2] eleven=[[][]] twelve=[{}{}] diff --git a/assets/inputs/complex_keys.corn b/assets/inputs/complex_keys.corn index cb74912..fca06cc 100644 --- a/assets/inputs/complex_keys.corn +++ b/assets/inputs/complex_keys.corn @@ -2,7 +2,7 @@ with_underscore = 0 with-dash = 1 with_🌽 = 2 - !"£$%^&*()_ = 3 + !£$%^&*()_ = 3 j12345 = 4 foo.bar-baz = "hello" apple-pie.crust = "yum" diff --git a/assets/inputs/very_compact.corn b/assets/inputs/very_compact.corn index 3932084..4af65a8 100644 --- a/assets/inputs/very_compact.corn +++ b/assets/inputs/very_compact.corn @@ -1 +1 @@ -{one={foo="bar" bar="foo"} two={foo=1 bar=2} three={foo=1.0 bar=2.0} four={foo=true bar=false} five={foo=null bar=null} six={foo={} bar={}} seven={foo=[] bar=[]} eight=["foo""bar"] nine=[truefalse] ten=[1 2] eleven=[[][]] twelve=[{}{}]} +{one={foo="bar" bar="foo"} two={foo=1 bar=2} three={foo=1.0 bar=2.0} four={foo=true bar=false} five={foo=null bar=null} six={foo={} bar={}} seven={foo=[] bar=[]} eight=["foo""bar"] nine=[true false] ten=[1 2] eleven=[[][]] twelve=[{}{}]} From a9b79d48444c98081dc72f0ee8a1c2a5875e5a38 Mon Sep 17 00:00:00 2001 From: Matilde Morrone Date: Fri, 28 Mar 2025 11:23:34 +0100 Subject: [PATCH 02/26] Create an initial skeleton for the new parser --- Cargo.lock | 317 ++++-------------------- Cargo.toml | 44 +--- cli/src/error.rs | 12 +- cli/src/main.rs | 4 +- src/de.rs | 608 +++++++++++++++++------------------------------ src/error.rs | 44 ++-- src/grammar.pest | 93 -------- src/lib.rs | 67 +----- src/parser.rs | 425 --------------------------------- src/value.rs | 34 +++ 10 files changed, 347 insertions(+), 1301 deletions(-) delete mode 100644 src/grammar.pest delete mode 100644 src/parser.rs create mode 100644 src/value.rs diff --git a/Cargo.lock b/Cargo.lock index e2a785d..40e37d2 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -67,6 +67,12 @@ dependencies = [ "windows-sys", ] +[[package]] +name = "anyhow" +version = "1.0.97" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dcfed56ad506cb2c684a14971b8861fdc3baaaae314b9e5f9bb532cbe3ba7a4f" + [[package]] name = "autocfg" version = "1.4.0" @@ -79,15 +85,6 @@ version = "2.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5c8214115b7bf84099f1309324e63141d4c5d7cc26862f97a0a857dbefe165bd" -[[package]] -name = "block-buffer" -version = "0.10.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3078c7629b62d3f0439517fa394996acacc5cbc91c5a20d8c658e77abd503a71" -dependencies = [ - "generic-array", -] - [[package]] name = "bstr" version = "1.11.3" @@ -112,19 +109,13 @@ checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5" [[package]] name = "cc" -version = "1.2.16" +version = "1.2.17" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "be714c154be609ec7f5dad223a33bf1482fff90472de28f7362806e6d4832b8c" +checksum = "1fcb57c740ae1daf453ae85f16e37396f672b039e00d9d866e07ddb24e328e3a" dependencies = [ "shlex", ] -[[package]] -name = "cfg-if" -version = "0.1.10" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4785bdd1c96b2a846b2bd7cc02e86b6b3dbf14e7e53446c4f54c92a361040822" - [[package]] name = "cfg-if" version = "1.0.0" @@ -160,9 +151,9 @@ dependencies = [ [[package]] name = "clap" -version = "4.5.31" +version = "4.5.34" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "027bb0d98429ae334a8698531da7077bdf906419543a35a55c2cb1b66437d767" +checksum = "e958897981290da2a852763fe9cdb89cd36977a5d729023127095fa94d95e2ff" dependencies = [ "clap_builder", "clap_derive", @@ -170,9 +161,9 @@ dependencies = [ [[package]] name = "clap_builder" -version = "4.5.31" +version = "4.5.34" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5589e0cba072e0f3d23791efac0fd8627b49c829c196a492e88168e6a669d863" +checksum = "83b0f35019843db2160b5bb19ae09b4e6411ac33fc6a712003c33e03090e2489" dependencies = [ "anstream", "anstyle", @@ -182,9 +173,9 @@ dependencies = [ [[package]] name = "clap_derive" -version = "4.5.28" +version = "4.5.32" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bf4ced95c6f4a675af3da73304b9ac4ed991640c36374e4b46795c49e17cf1ed" +checksum = "09176aae279615badda0765c0c0b3f6ed53f4709118af73cf4655d85d1530cd7" dependencies = [ "heck", "proc-macro2", @@ -213,16 +204,6 @@ dependencies = [ "windows-sys", ] -[[package]] -name = "console_error_panic_hook" -version = "0.1.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a06aeb73f470f66dcdbf7223caeebb85984942f22f1adb2a088cf9668146bbbc" -dependencies = [ - "cfg-if 1.0.0", - "wasm-bindgen", -] - [[package]] name = "corn-cli" version = "0.10.0" @@ -236,15 +217,6 @@ dependencies = [ "toml_edit", ] -[[package]] -name = "cpufeatures" -version = "0.2.17" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "59ed5838eebb26a2bb2e58f6d5b5316989ae9d08bab10e0e6d103e656d1b0280" -dependencies = [ - "libc", -] - [[package]] name = "criterion" version = "0.5.1" @@ -312,31 +284,11 @@ version = "0.2.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "43da5946c66ffcc7745f48db692ffbb10a83bfe0afd96235c5c2a4fb23994929" -[[package]] -name = "crypto-common" -version = "0.1.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1bfb12502f3fc46cca1bb51ac28df9d618d813cdc3d2f25b9fe775a34af26bb3" -dependencies = [ - "generic-array", - "typenum", -] - -[[package]] -name = "digest" -version = "0.10.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9ed9a281f7bc9b7576e61468ba615a66a5c8cfdff42420a70aa82701a3b1e292" -dependencies = [ - "block-buffer", - "crypto-common", -] - [[package]] name = "either" -version = "1.14.0" +version = "1.15.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b7914353092ddf589ad78f25c5c1c21b7f80b0ff8621e7c814c3485b5306da9d" +checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719" [[package]] name = "equivalent" @@ -354,23 +306,13 @@ dependencies = [ "typeid", ] -[[package]] -name = "generic-array" -version = "0.14.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "85649ca51fd72272d7821adaf274ad91c288277713d9c18820d8499a7ff69e9a" -dependencies = [ - "typenum", - "version_check", -] - [[package]] name = "half" -version = "2.4.1" +version = "2.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6dd08c532ae367adf81c312a4580bc67f1d0fe8bc9c460520283f4c0ff277888" +checksum = "7db2ff139bba50379da6aa0766b52fdcb62cb5b263009b09ed58ba604e14bbd1" dependencies = [ - "cfg-if 1.0.0", + "cfg-if", "crunchy", ] @@ -388,15 +330,15 @@ checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" [[package]] name = "hermit-abi" -version = "0.4.0" +version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fbf6a919d6cf397374f7dfeeea91d974c7c0a7221d0d0f4f20d859d329e53fcc" +checksum = "fbd780fe5cc30f81464441920d82ac8740e2e46b29a6fad543ddd075229ce37e" [[package]] name = "indexmap" -version = "2.7.1" +version = "2.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8c9c992b02b5b4c94ea26e32fe5bccb7aa7d9f390ab5c1221ff895bc7ea8b652" +checksum = "3954d50fe15b02142bf25d3b8bdadb634ec3948f103d04ffe3031bc8fe9d7058" dependencies = [ "equivalent", "hashbrown", @@ -405,9 +347,9 @@ dependencies = [ [[package]] name = "is-terminal" -version = "0.4.15" +version = "0.4.16" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e19b23d53f35ce9f56aebc7d1bb4e6ac1e9c0db7ac85c8d1760c04379edced37" +checksum = "e04d7f318608d35d4b61ddd75cbdaee86b023ebe2bd5a66ee0915f0bf93095a9" dependencies = [ "hermit-abi", "libc", @@ -456,32 +398,24 @@ dependencies = [ [[package]] name = "libc" -version = "0.2.170" +version = "0.2.171" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "875b3680cb2f8f71bdcf9a30f38d48282f5d3c95cbf9b3fa57269bb5d5c06828" +checksum = "c19937216e9d3aa9956d9bb8dfc0b0c8beb6058fc4f7a4dc4d850edf86a237d6" [[package]] name = "libcorn" version = "0.10.0" dependencies = [ - "cfg-if 1.0.0", - "console_error_panic_hook", + "anyhow", "criterion", "indexmap", "mlua", - "paste", - "pest", - "pest_derive", "serde", - "serde-wasm-bindgen", "serde_bytes", "serde_json", "serde_norway", - "thiserror", "toml_edit", - "wasm-bindgen", "wasm-bindgen-test", - "wee_alloc", ] [[package]] @@ -496,9 +430,9 @@ dependencies = [ [[package]] name = "log" -version = "0.4.26" +version = "0.4.27" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "30bde2b3dc3671ae49d8e2e9f044c7c005836e7a023ee57cffa25ab82764bb9e" +checksum = "13dc2df351e3202783a1fe0d44375f7295ffb4049267b0f3018346dc122a1d94" [[package]] name = "memchr" @@ -506,12 +440,6 @@ version = "2.7.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "78ca9ab1a0babb1e7d5695e3530886289c18cf2f87ec19a575a0abdce112e3a3" -[[package]] -name = "memory_units" -version = "0.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8452105ba047068f40ff7093dd1d9da90898e63dd61736462e9cdda6a90ad3c3" - [[package]] name = "minicov" version = "0.3.7" @@ -547,7 +475,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1901c1a635a22fe9250ffcc4fcc937c16b47c2e9e71adba8784af8bca1f69594" dependencies = [ "cc", - "cfg-if 1.0.0", + "cfg-if", "pkg-config", ] @@ -577,15 +505,15 @@ dependencies = [ [[package]] name = "once_cell" -version = "1.20.3" +version = "1.21.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "945462a4b81e43c4e3ba96bd7b49d834c6f61198356aa858733bc4acf3cbe62e" +checksum = "c2806eaa3524762875e21c3dcd057bc4b7bfa01ce4da8d46be1cd43649e1cc6b" [[package]] name = "oorandom" -version = "11.1.4" +version = "11.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b410bbe7e14ab526a0e86877eb47c6996a2bd7746f027ba551028c925390e4e9" +checksum = "d6790f58c7ff633d8771f42965289203411a5e5c68388703c06e14f24770b41e" [[package]] name = "ordered-float" @@ -612,64 +540,13 @@ version = "0.9.10" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1e401f977ab385c9e4e3ab30627d6f26d00e2c73eef317493c4ec6d468726cf8" dependencies = [ - "cfg-if 1.0.0", + "cfg-if", "libc", "redox_syscall", "smallvec", "windows-targets", ] -[[package]] -name = "paste" -version = "1.0.15" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "57c0d7b74b563b49d38dae00a0c37d4d6de9b432382b2892f0574ddcae73fd0a" - -[[package]] -name = "pest" -version = "2.7.15" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8b7cafe60d6cf8e62e1b9b2ea516a089c008945bb5a275416789e7db0bc199dc" -dependencies = [ - "memchr", - "thiserror", - "ucd-trie", -] - -[[package]] -name = "pest_derive" -version = "2.7.15" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "816518421cfc6887a0d62bf441b6ffb4536fcc926395a69e1a85852d4363f57e" -dependencies = [ - "pest", - "pest_generator", -] - -[[package]] -name = "pest_generator" -version = "2.7.15" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7d1396fd3a870fc7838768d171b4616d5c91f6cc25e377b673d714567d99377b" -dependencies = [ - "pest", - "pest_meta", - "proc-macro2", - "quote", - "syn", -] - -[[package]] -name = "pest_meta" -version = "2.7.15" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e1e58089ea25d717bfd31fb534e4f3afcc2cc569c70de3e239778991ea3b7dea" -dependencies = [ - "once_cell", - "pest", - "sha2", -] - [[package]] name = "pkg-config" version = "0.3.32" @@ -737,9 +614,9 @@ dependencies = [ [[package]] name = "quote" -version = "1.0.39" +version = "1.0.40" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c1f1914ce909e1658d9907913b4b91947430c7d9be598b15a1912935b8c04801" +checksum = "1885c039570dc00dcb4ff087a89e185fd56bae234ddc7f056a945bf36467248d" dependencies = [ "proc-macro2", ] @@ -837,9 +714,9 @@ checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" [[package]] name = "serde" -version = "1.0.218" +version = "1.0.219" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e8dfc9d19bdbf6d17e22319da49161d5d0108e4188e8b680aef6299eed22df60" +checksum = "5f0e2c6ed6606019b4e29e69dbaba95b11854410e5347d525002456dbbb786b6" dependencies = [ "serde_derive", ] @@ -854,31 +731,20 @@ dependencies = [ "serde", ] -[[package]] -name = "serde-wasm-bindgen" -version = "0.6.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8302e169f0eddcc139c70f139d19d6467353af16f9fce27e8c30158036a1e16b" -dependencies = [ - "js-sys", - "serde", - "wasm-bindgen", -] - [[package]] name = "serde_bytes" -version = "0.11.16" +version = "0.11.17" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "364fec0df39c49a083c9a8a18a23a6bcfd9af130fe9fe321d18520a0d113e09e" +checksum = "8437fd221bde2d4ca316d61b90e337e9e702b3820b87d63caa9ba6c02bd06d96" dependencies = [ "serde", ] [[package]] name = "serde_derive" -version = "1.0.218" +version = "1.0.219" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f09503e191f4e797cb8aac08e9a4a4695c5edf6a2e70e376d961ddd5c969f82b" +checksum = "5b0276cf7f2c73365f7157c8123c21cd9a50fbbd844757af28ca1f5925fc2a00" dependencies = [ "proc-macro2", "quote", @@ -919,17 +785,6 @@ dependencies = [ "serde", ] -[[package]] -name = "sha2" -version = "0.10.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "793db75ad2bcafc3ffa7c68b215fee268f537982cd901d132f89c6343f3a3dc8" -dependencies = [ - "cfg-if 1.0.0", - "cpufeatures", - "digest", -] - [[package]] name = "shlex" version = "1.3.0" @@ -950,35 +805,15 @@ checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f" [[package]] name = "syn" -version = "2.0.99" +version = "2.0.100" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e02e925281e18ffd9d640e234264753c43edc62d64b2d4cf898f1bc5e75f3fc2" +checksum = "b09a44accad81e1ba1cd74a32461ba89dee89095ba17b32f5d03683b1b1fc2a0" dependencies = [ "proc-macro2", "quote", "unicode-ident", ] -[[package]] -name = "thiserror" -version = "2.0.12" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "567b8a2dae586314f7be2a752ec7474332959c6460e02bde30d702a66d488708" -dependencies = [ - "thiserror-impl", -] - -[[package]] -name = "thiserror-impl" -version = "2.0.12" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7f7cf42b4507d8ea322120659672cf1b9dbb93f8f2d4ecfd6e51350ff5b17a1d" -dependencies = [ - "proc-macro2", - "quote", - "syn", -] - [[package]] name = "tinytemplate" version = "1.2.1" @@ -1017,18 +852,6 @@ version = "1.0.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bc7d623258602320d5c55d1bc22793b57daff0ec7efc270ea7d55ce1d5f5471c" -[[package]] -name = "typenum" -version = "1.18.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1dccffe3ce07af9386bfd29e80c0ab1a8205a2fc34e4bcd40364df902cfa8f3f" - -[[package]] -name = "ucd-trie" -version = "0.1.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2896d95c02a80c6d6a5d6e953d479f5ddf2dfdb6a244441010e373ac0fb88971" - [[package]] name = "unicode-ident" version = "1.0.18" @@ -1047,12 +870,6 @@ version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821" -[[package]] -name = "version_check" -version = "0.9.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a" - [[package]] name = "walkdir" version = "2.5.0" @@ -1069,7 +886,7 @@ version = "0.2.100" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1edc8929d7499fc4e8f0be2262a241556cfc54a0bea223790e71446f2aab1ef5" dependencies = [ - "cfg-if 1.0.0", + "cfg-if", "once_cell", "rustversion", "wasm-bindgen-macro", @@ -1095,7 +912,7 @@ version = "0.4.50" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "555d470ec0bc3bb57890405e5d4322cc9ea83cebb085523ced7be4144dac1e61" dependencies = [ - "cfg-if 1.0.0", + "cfg-if", "js-sys", "once_cell", "wasm-bindgen", @@ -1168,34 +985,6 @@ dependencies = [ "wasm-bindgen", ] -[[package]] -name = "wee_alloc" -version = "0.4.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dbb3b5a6b2bb17cb6ad44a2e68a43e8d2722c997da10e928665c72ec6c0a0b8e" -dependencies = [ - "cfg-if 0.1.10", - "libc", - "memory_units", - "winapi", -] - -[[package]] -name = "winapi" -version = "0.3.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" -dependencies = [ - "winapi-i686-pc-windows-gnu", - "winapi-x86_64-pc-windows-gnu", -] - -[[package]] -name = "winapi-i686-pc-windows-gnu" -version = "0.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" - [[package]] name = "winapi-util" version = "0.1.9" @@ -1205,12 +994,6 @@ dependencies = [ "windows-sys", ] -[[package]] -name = "winapi-x86_64-pc-windows-gnu" -version = "0.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" - [[package]] name = "windows-sys" version = "0.59.0" @@ -1286,9 +1069,9 @@ checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" [[package]] name = "winnow" -version = "0.7.3" +version = "0.7.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0e7f4ea97f6f78012141bcdb6a216b2609f0979ada50b20ca5b52dde2eac2bb1" +checksum = "0e97b544156e9bebe1a0ffbc03484fc1ffe3100cbce3ffb17eac35f7cdd7ab36" dependencies = [ "memchr", ] diff --git a/Cargo.toml b/Cargo.toml index 2725ac9..b5ede3f 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -9,22 +9,14 @@ license = "MIT" description = "Parsing engine for Corn, a simple and pain-free configuration language." repository = "https://github.com/JakeStanger/corn" categories = ["config", "parsing"] -keywords = ["configuration", "language", "wasm", "pest", "peg"] +keywords = ["configuration", "language", "wasm", "peg"] authors = ["Jake Stanger "] homepage = "https://cornlang.dev/" documentation = "https://docs.rs/libcorn" readme = "README.md" [features] -# WASM support -wasm = [ - "dep:wasm-bindgen", - "dep:serde-wasm-bindgen", - "dep:console_error_panic_hook", - "dep:wee_alloc", -] - -# Lua version support +# Lua support lua51 = ["dep:mlua", "mlua/lua51"] lua52 = ["dep:mlua", "mlua/lua52"] lua53 = ["dep:mlua", "mlua/lua53"] @@ -32,31 +24,19 @@ lua54 = ["dep:mlua", "mlua/lua54"] luajit = ["dep:mlua", "mlua/luajit"] luajit52 = ["dep:mlua", "mlua/luajit52"] -# Internal features -bench = ["dep:criterion"] - [lib] name = "corn" crate-type = ["cdylib", "rlib"] [dependencies] # Core -pest = "2.7.15" -pest_derive = "2.7.15" -serde = { version = "1.0.218", features = ["derive"] } -indexmap = { version = "2.7.1", features = ["serde"] } - -# Error handling -thiserror = "2.0.12" - -# Utilities -cfg-if = "1.0.0" +serde = "1.0.219" +indexmap = { version = "2.8.0", features = ["serde"] } -# WASM support (optional) -wasm-bindgen = { version = "0.2.100", optional = true } -serde-wasm-bindgen = { version = "0.6.5", optional = true } -console_error_panic_hook = { version = "0.1.7", optional = true } -wee_alloc = { version = "0.4.5", optional = true } +# # WASM support (optional) +# wasm-bindgen = { version = "0.2.100", optional = true } +# serde-wasm-bindgen = { version = "0.6.5", optional = true } +# console_error_panic_hook = { version = "0.1.7", optional = true } # Lua support (optional) mlua = { version = "0.10.3", features = [ @@ -65,16 +45,15 @@ mlua = { version = "0.10.3", features = [ "serialize", ], optional = true } -# Benchmarking (optional) -criterion = { version = "0.5.1", features = ["html_reports"], optional = true } [dev-dependencies] -paste = "1.0.15" wasm-bindgen-test = { version = "0.3.50" } serde_json = "1.0.140" serde_norway = "0.9.42" -serde_bytes = "0.11.16" +serde_bytes = "0.11.17" toml_edit = { version = "0.22.24", features = ["serde"] } +anyhow = "1.0.97" +criterion = { version = "0.5.1", features = ["html_reports"] } [profile.release] lto = true @@ -84,4 +63,3 @@ strip = true [[bench]] name = "serde" harness = false -required-features = ["bench"] diff --git a/cli/src/error.rs b/cli/src/error.rs index 58f213b..4819aa9 100644 --- a/cli/src/error.rs +++ b/cli/src/error.rs @@ -1,5 +1,5 @@ use colored::Colorize; -use corn::error::Error as CornError; +use corn::Error as CornError; use std::fmt::{Display, Formatter}; use std::io; @@ -21,11 +21,11 @@ impl ExitCode for CornError { fn get_exit_code(&self) -> i32 { match self { CornError::Io(_) => 3, - CornError::ParserError(_) => 1, - CornError::InputResolveError(_) => 2, - CornError::InvalidPathError(_) => 6, - CornError::InvalidSpreadError(_) => 7, - CornError::InvalidInterpolationError(_) => 8, + // CornError::ParserError(_) => 1, + // CornError::InputResolveError(_) => 2, + // CornError::InvalidPathError(_) => 6, + // CornError::InvalidSpreadError(_) => 7, + // CornError::InvalidInterpolationError(_) => 8, CornError::DeserializationError(_) => 5, } } diff --git a/cli/src/main.rs b/cli/src/main.rs index 71e40cb..d00d7a8 100644 --- a/cli/src/main.rs +++ b/cli/src/main.rs @@ -1,4 +1,4 @@ -use corn::{parse, Value}; +use corn::Value; use std::io::Read; use std::process::exit; use std::{fs, io}; @@ -47,7 +47,7 @@ fn main() { Ok(unparsed_file) => { let output_type = get_output_type(args.output_type); - match parse(&unparsed_file) { + match corn::from_str(&unparsed_file) { Ok(config) => match serialize(&config, output_type) { Ok(serialized) => println!("{serialized}"), Err(err) => handle_err(&err), diff --git a/src/de.rs b/src/de.rs index 8cade85..d949003 100644 --- a/src/de.rs +++ b/src/de.rs @@ -1,543 +1,363 @@ -use std::collections::VecDeque; +use std::collections::HashMap; -use serde::de::{self, DeserializeSeed, EnumAccess, IntoDeserializer, VariantAccess, Visitor}; +use serde::de; -use crate::error::{Error, Result}; -use crate::parse; -use crate::Value; +use crate::{Error, Value}; -#[derive(Debug)] +#[derive(Clone)] pub struct Deserializer<'de> { - value: Option>, + bytes: &'de [u8], + index: usize, + variables: HashMap, } -impl<'de> Deserializer<'de> { - pub fn from_str(input: &'de str) -> Result { - let parsed = parse(input)?; - - Ok(Self::from_value(parsed)) - } - - fn from_value(value: Value<'de>) -> Self { - Self { value: Some(value) } +impl Deserializer<'_> { + pub fn from_str(input: &str) -> Self { + todo!() } } -/// Attempts to deserialize the config from a string slice. -/// -/// # Errors -/// -/// Will return a `DeserializationError` if the config is invalid. -pub fn from_str(s: &str) -> Result +pub fn from_str<'a, T>(s: &'a str) -> Result where - T: de::DeserializeOwned, + T: de::Deserialize<'a>, { - let mut deserializer = Deserializer::from_str(s)?; - T::deserialize(&mut deserializer) -} + let mut deserializer = Deserializer::from_str(s); -/// Attempts to deserialize the config from a byte slice. -/// -/// # Errors -/// -/// Will return a `DeserializationError` if the config is invalid. -pub fn from_slice(bytes: &[u8]) -> Result -where - T: de::DeserializeOwned, -{ - match std::str::from_utf8(bytes) { - Ok(s) => from_str(s), - Err(e) => Err(Error::DeserializationError(e.to_string())), - } -} - -macro_rules! get_value { - ($self:ident) => { - match $self.value.take() { - Some(val) => Ok(val), - None => Err(Error::DeserializationError(String::from( - "Deserializer value unexpectedly `None`", - ))), - }? - }; -} - -macro_rules! err_expected { - ($expected:literal, $got:expr) => { - Err(Error::DeserializationError(format!( - "Expected {}, found '{:?}'", - $expected, $got - ))) - }; -} - -macro_rules! match_value { - ($self:ident, $name:literal, $($pat:pat => $expr:expr)+) => {{ - let value = get_value!($self); - match value { - $($pat => $expr, )+ - _ => err_expected!($name, value) - } - }}; + T::deserialize(&mut deserializer) } impl<'de> de::Deserializer<'de> for &mut Deserializer<'de> { type Error = Error; - fn deserialize_any(self, visitor: V) -> std::result::Result + fn deserialize_any(self, visitor: V) -> Result where - V: Visitor<'de>, + V: de::Visitor<'de>, { - let value = get_value!(self); - match value { - Value::Object(_) => { - let map = Map::new(value); - visitor.visit_map(map) - } - Value::Array(_) => { - let seq = Seq::new(value); - visitor.visit_seq(seq) - } - Value::String(val) => visitor.visit_str(&val), - Value::Integer(val) => visitor.visit_i64(val), - Value::Float(val) => visitor.visit_f64(val), - Value::Boolean(val) => visitor.visit_bool(val), - Value::Null(_) => visitor.visit_unit(), - } + todo!() } - fn deserialize_bool(self, visitor: V) -> std::result::Result + fn deserialize_bool(self, visitor: V) -> Result where - V: Visitor<'de>, + V: de::Visitor<'de>, { - match_value!(self, "boolean", Value::Boolean(val) => visitor.visit_bool(val)) + todo!() } - fn deserialize_i8(self, visitor: V) -> std::result::Result + fn deserialize_i8(self, visitor: V) -> Result where - V: Visitor<'de>, + V: de::Visitor<'de>, { - match_value!(self, "integer (i8)", Value::Integer(val) => visitor.visit_i8(val as i8)) + todo!() } - fn deserialize_i16(self, visitor: V) -> std::result::Result + fn deserialize_i16(self, visitor: V) -> Result where - V: Visitor<'de>, + V: de::Visitor<'de>, { - match_value!(self, "integer (i16)", Value::Integer(val) => visitor.visit_i16(val as i16)) + todo!() } - fn deserialize_i32(self, visitor: V) -> std::result::Result + fn deserialize_i32(self, visitor: V) -> Result where - V: Visitor<'de>, + V: de::Visitor<'de>, { - match_value!(self, "integer (i32)", Value::Integer(val) => visitor.visit_i32(val as i32)) + todo!() } - fn deserialize_i64(self, visitor: V) -> std::result::Result + fn deserialize_i64(self, visitor: V) -> Result where - V: Visitor<'de>, + V: de::Visitor<'de>, { - match_value!(self, "integer (i64)", Value::Integer(val) => visitor.visit_i64(val)) + todo!() } - fn deserialize_u8(self, visitor: V) -> std::result::Result + fn deserialize_u8(self, visitor: V) -> Result where - V: Visitor<'de>, + V: de::Visitor<'de>, { - match_value!(self, "integer (u8)", Value::Integer(val) => visitor.visit_u8(val as u8)) + todo!() } - fn deserialize_u16(self, visitor: V) -> std::result::Result + fn deserialize_u16(self, visitor: V) -> Result where - V: Visitor<'de>, + V: de::Visitor<'de>, { - match_value!(self, "integer (u16)", Value::Integer(val) => visitor.visit_u16(val as u16)) + todo!() } - fn deserialize_u32(self, visitor: V) -> std::result::Result + fn deserialize_u32(self, visitor: V) -> Result where - V: Visitor<'de>, + V: de::Visitor<'de>, { - match_value!(self, "integer (u32)", Value::Integer(val) => visitor.visit_u32(val as u32)) + todo!() } - fn deserialize_u64(self, visitor: V) -> std::result::Result + fn deserialize_u64(self, visitor: V) -> Result where - V: Visitor<'de>, + V: de::Visitor<'de>, { - match_value!(self, "integer (u64)", Value::Integer(val) => visitor.visit_u64(val as u64)) + todo!() } - fn deserialize_f32(self, visitor: V) -> std::result::Result + fn deserialize_f32(self, visitor: V) -> Result where - V: Visitor<'de>, + V: de::Visitor<'de>, { - match_value!(self, "float (f32)", Value::Float(val) => visitor.visit_f32(val as f32)) + todo!() } - fn deserialize_f64(self, visitor: V) -> std::result::Result + fn deserialize_f64(self, visitor: V) -> Result where - V: Visitor<'de>, + V: de::Visitor<'de>, { - match_value!(self, "float (f64)", Value::Float(val) => visitor.visit_f64(val)) + todo!() } - fn deserialize_char(self, visitor: V) -> std::result::Result + fn deserialize_char(self, visitor: V) -> Result where - V: Visitor<'de>, + V: de::Visitor<'de>, { - let value = get_value!(self); - let char = match value { - Value::String(value) => value.chars().next(), - _ => return err_expected!("char", value), - }; - - match char { - Some(char) => visitor.visit_char(char), - None => err_expected!("char", "empty string"), - } + todo!() } - fn deserialize_str(self, visitor: V) -> std::result::Result + fn deserialize_str(self, visitor: V) -> Result where - V: Visitor<'de>, + V: de::Visitor<'de>, { - match_value!(self, "string", - Value::String(val) => visitor.visit_str(&val) - ) + todo!() } - fn deserialize_string(self, visitor: V) -> std::result::Result + fn deserialize_string(self, visitor: V) -> Result where - V: Visitor<'de>, + V: de::Visitor<'de>, { - self.deserialize_str(visitor) + todo!() } - fn deserialize_bytes(self, visitor: V) -> std::result::Result + fn deserialize_bytes(self, visitor: V) -> Result where - V: Visitor<'de>, + V: de::Visitor<'de>, { - match_value!(self, "bytes array", - Value::String(val) => visitor.visit_bytes(val.as_bytes()) - ) + todo!() } - fn deserialize_byte_buf(self, visitor: V) -> std::result::Result + fn deserialize_byte_buf(self, visitor: V) -> Result where - V: Visitor<'de>, + V: de::Visitor<'de>, { - self.deserialize_bytes(visitor) + todo!() } - fn deserialize_option(self, visitor: V) -> std::result::Result + fn deserialize_option(self, visitor: V) -> Result where - V: Visitor<'de>, + V: de::Visitor<'de>, { - let value = get_value!(self); - match value { - Value::Null(_) => visitor.visit_none(), - _ => visitor.visit_some(&mut Deserializer::from_value(value)), - } + todo!() } - fn deserialize_unit(self, visitor: V) -> std::result::Result + fn deserialize_unit(self, visitor: V) -> Result where - V: Visitor<'de>, + V: de::Visitor<'de>, { - visitor.visit_unit() + todo!() } fn deserialize_unit_struct( self, - _name: &'static str, + name: &'static str, visitor: V, - ) -> std::result::Result + ) -> Result where - V: Visitor<'de>, + V: de::Visitor<'de>, { - self.deserialize_unit(visitor) + todo!() } fn deserialize_newtype_struct( self, - _name: &'static str, + name: &'static str, visitor: V, - ) -> std::result::Result + ) -> Result where - V: Visitor<'de>, + V: de::Visitor<'de>, { - visitor.visit_newtype_struct(self) + todo!() } - fn deserialize_seq(self, visitor: V) -> std::result::Result + fn deserialize_seq(self, visitor: V) -> Result where - V: Visitor<'de>, + V: de::Visitor<'de>, { - let value = get_value!(self); - match value { - Value::Array(_) => visitor.visit_seq(Seq::new(value)), - _ => err_expected!("array", value), - } + todo!() } - fn deserialize_tuple( - self, - _len: usize, - visitor: V, - ) -> std::result::Result + fn deserialize_tuple(self, len: usize, visitor: V) -> Result where - V: Visitor<'de>, + V: de::Visitor<'de>, { - self.deserialize_seq(visitor) + todo!() } fn deserialize_tuple_struct( self, - _name: &'static str, - _len: usize, + name: &'static str, + len: usize, visitor: V, - ) -> std::result::Result + ) -> Result where - V: Visitor<'de>, + V: de::Visitor<'de>, { - self.deserialize_seq(visitor) + todo!() } - fn deserialize_map(self, visitor: V) -> std::result::Result + fn deserialize_map(self, visitor: V) -> Result where - V: Visitor<'de>, + V: de::Visitor<'de>, { - let value = get_value!(self); - match value { - Value::Object(_) => visitor.visit_map(Map::new(value)), - _ => err_expected!("object", value), - } + todo!() } fn deserialize_struct( self, - _name: &'static str, - _fields: &'static [&'static str], + name: &'static str, + fields: &'static [&'static str], visitor: V, - ) -> std::result::Result + ) -> Result where - V: Visitor<'de>, + V: de::Visitor<'de>, { - self.deserialize_map(visitor) + todo!() } fn deserialize_enum( self, - _name: &'static str, - _variants: &'static [&'static str], + name: &'static str, + variants: &'static [&'static str], visitor: V, - ) -> std::result::Result - where - V: Visitor<'de>, - { - let value = get_value!(self); - match value { - Value::Object(_) => visitor.visit_enum(Enum::new(value)), - Value::String(val) => visitor.visit_enum(val.into_deserializer()), - _ => err_expected!("object or string (enum variant)", value), - } - } - - fn deserialize_identifier(self, visitor: V) -> std::result::Result + ) -> Result where - V: Visitor<'de>, + V: de::Visitor<'de>, { - self.deserialize_str(visitor) - } - - fn deserialize_ignored_any(self, visitor: V) -> std::result::Result - where - V: Visitor<'de>, - { - self.deserialize_any(visitor) - } -} - -struct Map<'de> { - values: VecDeque>, -} - -impl<'de> Map<'de> { - fn new(value: Value<'de>) -> Self { - match value { - Value::Object(values) => Self { - values: values - .into_iter() - .flat_map(|(key, value)| vec![Value::String(key), value]) - .collect(), - }, - _ => unreachable!(), - } - } -} - -impl<'de> de::MapAccess<'de> for Map<'de> { - type Error = Error; - - fn next_key_seed(&mut self, seed: K) -> std::result::Result, Self::Error> - where - K: DeserializeSeed<'de>, - { - if let Some(value) = self.values.pop_front() { - seed.deserialize(&mut Deserializer::from_value(value)) - .map(Some) - } else { - Ok(None) - } - } - - fn next_value_seed(&mut self, seed: V) -> std::result::Result - where - V: DeserializeSeed<'de>, - { - match self.values.pop_front() { - Some(value) => seed.deserialize(&mut Deserializer::from_value(value)), - None => Err(Error::DeserializationError( - "Expected value to exist".to_string(), - )), - } - } - - fn size_hint(&self) -> Option { - Some(self.values.len() / 2) + todo!() } -} - -struct Seq<'de> { - values: VecDeque>, -} -impl<'de> Seq<'de> { - fn new(value: Value<'de>) -> Self { - match value { - Value::Array(values) => Self { - values: VecDeque::from(values), - }, - _ => unreachable!(), - } - } -} - -impl<'de> de::SeqAccess<'de> for Seq<'de> { - type Error = Error; - - fn next_element_seed( - &mut self, - seed: T, - ) -> std::result::Result, Self::Error> + fn deserialize_identifier(self, visitor: V) -> Result where - T: DeserializeSeed<'de>, + V: de::Visitor<'de>, { - if let Some(value) = self.values.pop_front() { - seed.deserialize(&mut Deserializer::from_value(value)) - .map(Some) - } else { - Ok(None) - } + todo!() } - fn size_hint(&self) -> Option { - Some(self.values.len()) - } -} - -struct Enum<'de> { - value: Value<'de>, -} - -impl<'de> Enum<'de> { - fn new(value: Value<'de>) -> Self { - Self { value } - } -} - -impl<'de> EnumAccess<'de> for Enum<'de> { - type Error = Error; - type Variant = Variant<'de>; - - fn variant_seed(self, seed: V) -> std::result::Result<(V::Value, Self::Variant), Self::Error> + fn deserialize_ignored_any(self, visitor: V) -> Result where - V: DeserializeSeed<'de>, + V: de::Visitor<'de>, { - match self.value { - Value::String(_) => { - let value = seed.deserialize(&mut Deserializer::from_value(self.value))?; - Ok((value, Variant::new(None))) - } - Value::Object(obj) => { - let first_pair = obj.into_iter().next(); - if let Some(first_pair) = first_pair { - let value = Value::String(first_pair.0); - let tag = seed.deserialize(&mut Deserializer::from_value(value))?; - Ok((tag, Variant::new(Some(first_pair.1)))) - } else { - Err(Error::DeserializationError( - "Cannot deserialize empty object into enum".to_string(), - )) - } - } - _ => unreachable!(), - } - } -} - -struct Variant<'de> { - value: Option>, -} - -impl<'de> Variant<'de> { - fn new(value: Option>) -> Self { - Self { value } + todo!() } } -impl<'de> VariantAccess<'de> for Variant<'de> { - type Error = Error; - - fn unit_variant(self) -> std::result::Result<(), Self::Error> { - Ok(()) - } - - fn newtype_variant_seed(self, seed: T) -> std::result::Result - where - T: DeserializeSeed<'de>, - { - match self.value { - Some(value) => seed.deserialize(&mut Deserializer::from_value(value)), - None => Err(Error::DeserializationError( - "Expected value to exist".to_string(), - )), - } - } - - fn tuple_variant(self, _len: usize, visitor: V) -> std::result::Result - where - V: Visitor<'de>, - { - match self.value { - Some(value) if matches!(value, Value::Array(_)) => visitor.visit_seq(Seq::new(value)), - _ => unreachable!(), - } - } - - fn struct_variant( - self, - _fields: &'static [&'static str], - visitor: V, - ) -> std::result::Result - where - V: Visitor<'de>, - { - match self.value { - Some(value) if matches!(value, Value::Object(_)) => visitor.visit_map(Map::new(value)), - _ => unreachable!(), - } - } -} +// impl Entry { +// pub const fn as_type(&self) -> &'static str { +// match self { +// Self::String(_) => todo!(), +// // Self::InterpolatedString(string_parts) => todo!(), +// Self::Integer(_) => todo!(), +// Self::Float(_) => todo!(), +// Self::Boolean(_) => todo!(), +// Self::Object(object_entry) => todo!(), +// Self::Array(array_entry) => todo!(), +// Self::Input(_) => todo!(), +// Self::Null => todo!(), +// } +// } +// } + +// #[derive(Debug, Clone)] +// pub enum ObjectEntry { +// Flat(IndexMap), +// WithSpreads(Vec), +// } + +// #[derive(Debug, Clone)] +// pub enum ObjectPart { +// Pair(String, Entry), +// Spread(String), +// } + +// #[derive(Debug, Clone)] +// pub enum ArrayEntry { +// Flat(Vec), +// WithSpreads(Vec), +// } + +// /// Part of an array with spreads +// #[derive(Debug, Clone)] +// pub enum ArrayPart { +// Entry(Entry), +// Spread(String), +// } + +// /// Helpers for the parser +// #[derive(Debug, Clone)] +// pub(crate) enum SpreadOr { +// Spread(String), +// Other(T), +// } + +// pub(crate) fn pairs_to_object(pairs: Vec>) -> ObjectEntry { +// let has_spreads = pairs.iter().any(|p| matches!(p, SpreadOr::Spread(_))); + +// if has_spreads { +// let parts: Vec = pairs +// .into_iter() +// .map(|p| match p { +// SpreadOr::Other((k, v)) => ObjectPart::Pair(k, v), +// SpreadOr::Spread(name) => ObjectPart::Spread(name), +// }) +// .collect(); +// ObjectEntry::WithSpreads(parts) +// } else { +// let map: IndexMap = pairs +// .into_iter() +// .filter_map(|p| match p { +// SpreadOr::Other((k, v)) => Some((k, v)), +// _ => None, // This should never happen if has_spreads is false +// }) +// .collect(); +// ObjectEntry::Flat(map) +// } +// } + +// pub(crate) fn entries_to_array(entries: Vec>) -> ArrayEntry { +// let has_spreads = entries.iter().any(|e| matches!(e, SpreadOr::Spread(_))); + +// if has_spreads { +// let parts: Vec = entries +// .into_iter() +// .map(|e| match e { +// SpreadOr::Other(v) => ArrayPart::Entry(v), +// SpreadOr::Spread(name) => ArrayPart::Spread(name), +// }) +// .collect(); +// ArrayEntry::WithSpreads(parts) +// } else { +// let values: Vec = entries +// .into_iter() +// .filter_map(|e| match e { +// SpreadOr::Other(v) => Some(v), +// _ => None, // This should never happen if has_spreads is false +// }) +// .collect(); +// ArrayEntry::Flat(values) +// } +// } + +// // pub(crate) fn create_nested_entry(keys: Vec, value: Entry) -> Entry { +// // let mut current = value; + +// // for key in keys.into_iter().rev() { +// // current = Entry::Object(ObjectEntry::Flat(indexmap! {key => current})); +// // } + +// // current +// // } diff --git a/src/error.rs b/src/error.rs index cf0e631..5a84080 100644 --- a/src/error.rs +++ b/src/error.rs @@ -1,34 +1,38 @@ -use std::fmt::{Debug, Display}; -use thiserror::Error; +use std::fmt::Display; -use crate::Rule; +pub type Result = std::result::Result; -pub type Result = std::result::Result; - -#[derive(Error, Debug)] +#[derive(Debug)] pub enum Error { - #[error(transparent)] - Io(#[from] std::io::Error), - - #[error(transparent)] - ParserError(#[from] Box>), + Io(std::io::Error), - #[error("failed to resolve referenced input `{0}`")] - InputResolveError(String), + // #[error("failed to resolve referenced input `{0}`")] + // InputResolveError(String), - #[error("attempted to use dot-notation on non-object value at `{0}`")] - InvalidPathError(String), + // #[error("attempted to use dot-notation on non-object value at `{0}`")] + // InvalidPathError(String), - #[error("attempted to spread a type that differs from its containing type at `{0}`")] - InvalidSpreadError(String), + // #[error("attempted to spread a type that differs from its containing type at `{0}`")] + // InvalidSpreadError(String), - #[error("attempted to interpolate a non-string type into a string at `{0}`")] - InvalidInterpolationError(String), + // #[error("attempted to interpolate a non-string type into a string at `{0}`")] + // InvalidInterpolationError(String), - #[error("failed to deserialize input: {0}")] + // #[error("failed to deserialize input: {0}")] DeserializationError(String), } +impl Display for Error { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Self::Io(e) => e.fmt(f), + Self::DeserializationError(s) => s.fmt(f), + } + } +} + +impl std::error::Error for Error {} + impl serde::de::Error for Error { fn custom(msg: T) -> Self where diff --git a/src/grammar.pest b/src/grammar.pest deleted file mode 100644 index 2cbe2c4..0000000 --- a/src/grammar.pest +++ /dev/null @@ -1,93 +0,0 @@ -WHITESPACE = _{ " " | "\t" | "\r" | "\n" } -COMMENT = _{ "//" ~ (!"\n" ~ ANY)* } - -object = { - "{" - ~ object_value* - ~ "}" -} - -object_value = _{ - pair | spread -} - -spread = { - ".." ~ input -} - -array = { - "[" - ~ array_value* - ~ "]" -} - -array_value = _{ - value | spread -} - -pair = { path ~ "=" ~ value } - -path = ${ - path_seg - ~ ( "." ~ path_seg )* -} - -path_seg = _{ - quoted_path_seg | regular_path_seg -} - -quoted_path_seg = ${ "'" ~ quoted_path_val ~ "'" } -quoted_path_val = ${ quoted_path_char + } -quoted_path_char = _{ - !("'" | "\\") ~ ANY - | "\\" ~ "'" -} - -regular_path_seg = ${ path_char + } - -path_char = _{ !( WHITESPACE | "=" | "." ) ~ ANY } - -value = _{ object | array | input | string | float | integer | boolean | null } - -boolean = { "true" | "false" } -null = { "null" } - -string = ${ - "\"" ~ string_val ~ "\"" -} - -string_val = ${ (input | char)* } - -char = { - !("\"" | "\\") ~ ANY - | "\\" ~ ("\"" | "\\" | "n" | "r" | "t" | "$") - | "\\" ~ ("u" ~ ASCII_HEX_DIGIT{4}) -} - -integer = ${ - hex_integer | decimal_integer -} - -decimal_integer = @{ - "-"? - ~ ("0" | ASCII_NONZERO_DIGIT ~ ("_"? ~ ASCII_DIGIT)*) -} - -hex_integer = @{ - "0x" ~ ASCII_HEX_DIGIT+ -} - -float = @{ - "-"? - ~ ("0" | ASCII_NONZERO_DIGIT ~ ASCII_DIGIT*) - ~ ("." ~ ASCII_DIGIT*) - ~ (^"e" ~ ("+" | "-")? ~ ASCII_DIGIT+)? -} - -input = ${ !"\\" ~ "$" ~ (ASCII_ALPHA | "_") ~ (ASCII_ALPHANUMERIC | "_")+ } - -assignment = { input ~ "=" ~ value } - -assign_block = { "let" ~ "{" ~ assignment* ~ "}" ~ "in" } - -config = _{ SOI ~ assign_block? ~ object ~ EOI } diff --git a/src/lib.rs b/src/lib.rs index f06dfb9..608a85f 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,16 +1,3 @@ -use indexmap::IndexMap; -use serde::Serialize; -use std::borrow::Cow; -use std::collections::HashMap; -use std::fmt::{Display, Formatter}; - -pub use crate::de::{from_slice, from_str}; -pub use crate::parser::{parse, Rule}; - -pub mod error; -mod parser; - -mod de; #[cfg(any( feature = "lua51", feature = "lua52", @@ -20,53 +7,11 @@ mod de; feature = "luajit52" ))] mod lua; -#[cfg(feature = "wasm")] -mod wasm; -/// A map of input names and values. -/// The names include their `$` prefix. -pub type Inputs<'a> = HashMap<&'a str, Value<'a>>; - -/// A map of keys to their values. -pub type Object<'a> = IndexMap, Value<'a>>; - -#[derive(Serialize, Debug, Clone)] -#[serde(untagged)] -pub enum Value<'a> { - /// Key/value map. Values can be mixed types. - Object(Object<'a>), - /// Array of values, can be mixed types. - Array(Vec>), - /// UTF-8 string - String(Cow<'a, str>), - /// 64-bit signed integer. - Integer(i64), - /// 64-bit (double precision) floating point number. - Float(f64), - /// true or false - Boolean(bool), - /// `null` literal. - /// - /// Takes an optional unit type as the `toml` crate - /// errors when encountering unit types, - /// but can handle `None` types. - Null(Option<()>), -} +mod de; +mod error; +mod value; -impl Display for Value<'_> { - fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { - write!( - f, - "{}", - match self { - Value::Object(_) => "object", - Value::Array(_) => "array", - Value::String(_) => "string", - Value::Integer(_) => "integer", - Value::Float(_) => "float", - Value::Boolean(_) => "boolean", - Value::Null(_) => "null", - } - ) - } -} +pub use de::{from_str, Deserializer}; +pub use error::{Error, Result}; +pub use value::{Object, Value}; diff --git a/src/parser.rs b/src/parser.rs deleted file mode 100644 index 248743f..0000000 --- a/src/parser.rs +++ /dev/null @@ -1,425 +0,0 @@ -use indexmap::IndexMap; -use std::borrow::Cow; -use std::collections::HashMap; -use std::env::var; -use std::fmt::Formatter; - -use pest::iterators::Pair; -use pest::Parser; - -use crate::error::{Error, Result}; -use crate::{Inputs, Object, Value}; - -#[derive(pest_derive::Parser)] -#[grammar = "grammar.pest"] -pub struct AstParser; - -impl std::fmt::Display for Rule { - fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { - write!(f, "{self:?}") - } -} - -struct CornParser<'a> { - input_block: Option>, - inputs: Inputs<'a>, -} - -impl<'a> CornParser<'a> { - pub fn new(input_block: Option>) -> Self { - let inputs = HashMap::new(); - Self { - input_block, - inputs, - } - } - - pub fn parse(mut self, object_block: Pair<'a, Rule>) -> Result> { - if let Some(input_block) = self.input_block.take() { - self.parse_assign_block(input_block)?; - } - - let value_block = self.parse_object(object_block)?; - Ok(Value::Object(value_block)) - } - - /// Parses a pair of tokens (marked as a `Rule`) into a `Value`. - fn parse_value(&self, pair: Pair<'a, Rule>) -> Result> { - match pair.as_rule() { - Rule::object => Ok(Value::Object(self.parse_object(pair)?)), - Rule::array => Ok(Value::Array(self.parse_array(pair)?)), - Rule::string => Ok(Value::String(self.parse_string(pair)?)), - Rule::integer => Ok(Value::Integer(Self::parse_integer(pair))), - Rule::float => Ok(Value::Float(Self::parse_float(&pair))), - Rule::boolean => Ok(Value::Boolean(Self::parse_bool(&pair))), - Rule::null => Ok(Value::Null(None)), - Rule::input => { - let key = pair.as_str(); - self.get_input(key) - } - _ => unreachable!(), - } - } - - fn parse_bool(pair: &Pair<'_, Rule>) -> bool { - assert_eq!(pair.as_rule(), Rule::boolean); - match pair.as_str() { - "true" => true, - "false" => false, - _ => unreachable!(), - } - } - - fn parse_integer(pair: Pair<'_, Rule>) -> i64 { - assert_eq!(pair.as_rule(), Rule::integer); - let sub_pair = pair - .into_inner() - .next() - .expect("integers should contain a sub-rule of their type"); - - match sub_pair.as_rule() { - Rule::decimal_integer => sub_pair - .as_str() - .replace('_', "") - .parse() - .expect("decimal integer rules should match valid rust integers"), - Rule::hex_integer => i64::from_str_radix(&sub_pair.as_str()[2..], 16) - .expect("hex integer rules contain valid hex values"), - _ => unreachable!(), - } - } - - fn parse_float(pair: &Pair<'_, Rule>) -> f64 { - assert_eq!(pair.as_rule(), Rule::float); - pair.as_str() - .parse() - .expect("float rules should match valid rust floats") - } - - /// Collects each `char` in a `Rule::string` - /// to form a single `String`. - fn parse_string(&self, pair: Pair<'a, Rule>) -> Result> { - assert_eq!(pair.as_rule(), Rule::string); - - let mut full_string = String::new(); - - let pairs = pair - .into_inner() - .next() - .expect("string rules should contain a valid string value") - .into_inner(); - - for pair in pairs { - match pair.as_rule() { - Rule::char => full_string.push(Self::parse_char(&pair)), - Rule::input => { - let input_name = pair.as_str(); - let value = self.get_input(input_name)?; - match value { - Value::String(value) => full_string.push_str(&value), - _ => return Err(Error::InvalidInterpolationError(input_name.to_string())), - } - } - _ => unreachable!(), - }; - } - - let full_string = if full_string.contains('\n') { - trim_multiline_string(&full_string) - } else { - full_string - }; - - Ok(Cow::Owned(full_string)) - } - - fn parse_char(pair: &Pair<'a, Rule>) -> char { - let str = pair.as_str(); - let mut chars = str.chars(); - - let first_char = chars.next().expect("character to exist"); - if first_char != '\\' { - return first_char; - } - - let second_char = chars.next().expect("character to exist"); - if second_char != 'u' { - return match second_char { - 'n' => '\n', - 'r' => '\r', - 't' => '\t', - '"' => '\"', - '$' => '$', - '\\' => '\\', - _ => unreachable!(), - }; - } - - let num = - u32::from_str_radix(&str[3..], 16).expect("valid hex characters to exist after \\u"); - char::from_u32(num).unwrap_or('\u{FFFD}') - } - - /// Parses each rule in a `Rule::array` - /// to form a vector of `Value`s. - fn parse_array(&self, block: Pair<'a, Rule>) -> Result>> { - assert_eq!(block.as_rule(), Rule::array); - - let mut arr = vec![]; - - for pair in block.into_inner() { - match pair.as_rule() { - Rule::spread => { - let input = pair - .into_inner() - .next() - .expect("spread operators should contain an input"); - - let input_name = input.as_str(); - let value = self.parse_value(input)?; - - match value { - Value::Array(other) => arr.extend(other), - _ => return Err(Error::InvalidSpreadError(input_name.to_string())), - } - } - _ => arr.push(self.parse_value(pair)?), - }; - } - - Ok(arr) - } - - /// Parses each key/value pair in a `Rule::object` - /// to form a `IndexMap` of Values. - /// - /// An `IndexMap` is used to ensure keys - /// always output in the same order. - fn parse_object(&self, block: Pair<'a, Rule>) -> Result> { - assert_eq!(block.as_rule(), Rule::object); - - let mut obj = IndexMap::new(); - - for pair in block.into_inner() { - match pair.as_rule() { - Rule::pair => { - let mut path_rules = pair.into_inner(); - - let path = path_rules - .next() - .expect("object pairs should contain a key"); - - let paths = Self::parse_path(path); - - let value = self.parse_value( - path_rules - .next() - .expect("object pairs should contain a value"), - )?; - - obj = Self::add_at_path(obj, &paths, value)?; - } - Rule::spread => { - let input = pair - .into_inner() - .next() - .expect("spread operators should contain an input"); - - let input_name = input.as_str(); - let value = self.parse_value(input)?; - - match value { - Value::Object(other) => obj.extend(other), - _ => return Err(Error::InvalidSpreadError(input_name.to_string())), - } - } - _ => unreachable!(), - } - } - - Ok(obj) - } - - fn parse_path(path: Pair) -> Vec> { - path.into_inner() - .map(|pair| match pair.as_rule() { - Rule::regular_path_seg => Cow::Borrowed(pair.as_str()), - Rule::quoted_path_seg => Cow::Owned( - pair.into_inner() - .next() - .expect("quoted paths should contain an inner value") - .as_str() - .replace('\\', ""), - ), - _ => unreachable!(), - }) - .collect::>() - } - - /// Adds `Value` at the `path` in `obj`. - /// - /// `path` is an array where each entry represents another object key, - /// for example `foo.bar` is represented as `["foo", "bar"]`. - /// - /// Objects are created up to the required depth recursively. - fn add_at_path( - mut obj: Object<'a>, - path: &[Cow<'a, str>], - value: Value<'a>, - ) -> Result> { - let (part, path_rest) = path - .split_first() - .expect("paths should contain at least 1 segment"); - - if path_rest.is_empty() { - obj.insert(part.clone(), value); - return Ok(obj); - } - - let child_obj = obj - .shift_remove(part) - .unwrap_or_else(|| Value::Object(IndexMap::new())); - - match child_obj { - Value::Object(map) => { - obj.insert( - part.clone(), - Value::Object(Self::add_at_path(map, path_rest, value)?), - ); - - Ok(obj) - } - _ => Err(Error::InvalidPathError(path.join("."))), - } - } - - /// Parses the `let { } in` block at the start of files. - /// Each input is inserted into into `self.inputs`. - fn parse_assign_block(&mut self, block: Pair<'a, Rule>) -> Result<()> { - assert_eq!(block.as_rule(), Rule::assign_block); - - for pair in block.into_inner() { - let mut assign_rules = pair.into_inner(); - let name = assign_rules - .next() - .expect("input assignments should have a name") - .as_str(); - - let value = self.parse_value( - assign_rules - .next() - .expect("input assignments should have a value"), - )?; - - self.inputs.insert(name, value); - } - - Ok(()) - } - - /// Attempts to get an input value from the `inputs` map. - /// If the `key` starts with `$env_` the system environment variables will be consulted first. - fn get_input(&self, key: &'a str) -> Result> { - if let Some(env_name) = key.strip_prefix("$env_") { - let var = var(env_name); - - if let Ok(var) = var { - return Ok(Value::String(Cow::Owned(var))); - } - } - - if let Some(value) = self.inputs.get(key) { - Ok(value.clone()) - } else { - Err(Error::InputResolveError(key.to_string())) - } - } -} - -/// Takes a multiline string and trims the maximum amount of -/// whitespace at the start of each line -/// while preserving formatting. -/// -/// Based on code from `indoc` crate: -/// -fn trim_multiline_string(string: &str) -> String { - let ignore_first_line = string.starts_with('\n') || string.starts_with("\r\n"); - - let spaces = string - .lines() - .skip(1) - .map(|line| line.chars().take_while(char::is_ascii_whitespace).count()) - .min() - .unwrap_or_default(); - - let mut result = String::with_capacity(string.len()); - for (i, line) in string.lines().enumerate() { - if i > 1 || (i == 1 && !ignore_first_line) { - result.push('\n'); - } - if i == 0 { - // Do not un-indent anything on same line as opening quote - result.push_str(line); - } else if line.len() > spaces { - // Whitespace-only lines may have fewer than the number of spaces - // being removed - result.push_str(&line[spaces..]); - } - } - result -} - -/// Parses the input string into a `Config` -/// containing the resolved inputs -/// and a map of values representing the top-level object. -/// -/// # Examples -/// -/// ```rust -/// use corn::parse; -/// -/// let corn = "{foo = 42}"; -/// -/// let config = parse(corn).unwrap(); -/// let json = serde_json::to_string(&config).unwrap(); -/// -/// assert_eq!(json, "{\"foo\":42}"); -/// ``` -/// -/// # Errors -/// -/// Will fail if the input contains a syntax error. -/// Will fail if the input contains invalid Corn for another reason, -/// including references to undefined inputs or dot-notation for non-object values. -/// Will fail if the input cannot be deserialized for any reaon. -/// -/// Any of the above will return a specific error type with details. -/// -/// # Panics -/// -/// If the internal AST parser produces a tree in an invalid structure, -/// the function will panic. -/// This indicates a severe error in the library and should never occur. -pub fn parse(file: &str) -> Result { - let rules = AstParser::parse(Rule::config, file); - - match rules { - Ok(mut rules) => { - let first_block = rules.next().expect("should be at least 1 rule"); - - match first_block.as_rule() { - Rule::assign_block => { - let parser = CornParser::new(Some(first_block)); - let object_block = rules.next().expect("should always be an object block"); - parser.parse(object_block) - } - Rule::object => { - let parser = CornParser::new(None); - parser.parse(first_block) - } - _ => unreachable!(), - } - } - Err(error) => Err(Error::ParserError(Box::new(error))), - } -} diff --git a/src/value.rs b/src/value.rs new file mode 100644 index 0000000..e12d1b8 --- /dev/null +++ b/src/value.rs @@ -0,0 +1,34 @@ +use indexmap::IndexMap; +use serde::{Deserialize, Serialize}; + +pub type Object = IndexMap; + +#[derive(Debug, Clone)] +pub enum Value { + String(String), + Integer(i64), + Float(f64), + Boolean(bool), + Object(Object), + Array(Vec), + Input(String), + Null, +} + +impl<'de> Deserialize<'de> for Value { + fn deserialize(deserializer: D) -> Result + where + D: serde::Deserializer<'de>, + { + todo!() + } +} + +impl Serialize for Value { + fn serialize(&self, serializer: S) -> Result + where + S: serde::Serializer, + { + todo!() + } +} From 7d6dd41b159366a9892955b398d45baff8980a7f Mon Sep 17 00:00:00 2001 From: Matilde Morrone Date: Fri, 28 Mar 2025 11:30:38 +0100 Subject: [PATCH 03/26] Fix lua implementation --- src/lua.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/lua.rs b/src/lua.rs index 539b992..c6ae350 100644 --- a/src/lua.rs +++ b/src/lua.rs @@ -1,14 +1,14 @@ use crate::Value; use mlua::prelude::*; -impl IntoLua for Value<'_> { +impl IntoLua for Value { fn into_lua(self, lua: &Lua) -> LuaResult { lua.to_value(&self) } } fn lua_parse(lua: &Lua, config: String) -> LuaResult { - let res = crate::parse(&config); + let res = crate::from_str::(&config); match res { Ok(v) => Ok(lua.to_value(&v)?), Err(e) => Err(LuaError::RuntimeError(e.to_string())), From 935f6c47a83096ac057ba3f93d4247e4f464be77 Mon Sep 17 00:00:00 2001 From: Matilde Morrone Date: Fri, 28 Mar 2025 11:30:59 +0100 Subject: [PATCH 04/26] Fix manifest --- Cargo.lock | 7 +++++++ Cargo.toml | 2 +- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/Cargo.lock b/Cargo.lock index 40e37d2..8bbd964 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -410,6 +410,7 @@ dependencies = [ "criterion", "indexmap", "mlua", + "paste", "serde", "serde_bytes", "serde_json", @@ -547,6 +548,12 @@ dependencies = [ "windows-targets", ] +[[package]] +name = "paste" +version = "1.0.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "57c0d7b74b563b49d38dae00a0c37d4d6de9b432382b2892f0574ddcae73fd0a" + [[package]] name = "pkg-config" version = "0.3.32" diff --git a/Cargo.toml b/Cargo.toml index b5ede3f..e66a905 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -45,7 +45,6 @@ mlua = { version = "0.10.3", features = [ "serialize", ], optional = true } - [dev-dependencies] wasm-bindgen-test = { version = "0.3.50" } serde_json = "1.0.140" @@ -54,6 +53,7 @@ serde_bytes = "0.11.17" toml_edit = { version = "0.22.24", features = ["serde"] } anyhow = "1.0.97" criterion = { version = "0.5.1", features = ["html_reports"] } +paste = "1.0.15" [profile.release] lto = true From ace884efe8ae57fc3be5ad043f53d68faaca7b47 Mon Sep 17 00:00:00 2001 From: Matilde Morrone Date: Sat, 29 Mar 2025 14:12:43 +0100 Subject: [PATCH 05/26] Implement custom deserializer and serializer for Value --- src/value.rs | 121 ++++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 116 insertions(+), 5 deletions(-) diff --git a/src/value.rs b/src/value.rs index e12d1b8..e68121b 100644 --- a/src/value.rs +++ b/src/value.rs @@ -1,17 +1,16 @@ use indexmap::IndexMap; -use serde::{Deserialize, Serialize}; +use serde::{de::Visitor, Deserialize, Serialize}; pub type Object = IndexMap; #[derive(Debug, Clone)] pub enum Value { String(String), - Integer(i64), + Integer(i64), // FIXME: Use a custom number wrapper to handle both signed and unsigned integers Float(f64), Boolean(bool), Object(Object), Array(Vec), - Input(String), Null, } @@ -20,15 +19,127 @@ impl<'de> Deserialize<'de> for Value { where D: serde::Deserializer<'de>, { - todo!() + struct ValueVisitor; + + impl<'de> Visitor<'de> for ValueVisitor { + type Value = Value; + + fn expecting(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result { + formatter.write_str("any valid Corn value") + } + + #[inline] + fn visit_bool(self, v: bool) -> Result + where + E: serde::de::Error, + { + Ok(Value::Boolean(v)) + } + + #[inline] + fn visit_i64(self, v: i64) -> Result + where + E: serde::de::Error, + { + Ok(Value::Integer(v)) + } + + fn visit_f64(self, v: f64) -> Result + where + E: serde::de::Error, + { + Ok(Value::Float(v)) + } + + fn visit_str(self, v: &str) -> Result + where + E: serde::de::Error, + { + self.visit_string(String::from(v)) + } + + fn visit_string(self, v: String) -> Result + where + E: serde::de::Error, + { + Ok(Value::String(v)) + } + + fn visit_none(self) -> Result + where + E: serde::de::Error, + { + Ok(Value::Null) + } + + fn visit_some(self, deserializer: D) -> Result + where + D: serde::Deserializer<'de>, + { + Deserialize::deserialize(deserializer) + } + + fn visit_unit(self) -> Result + where + E: serde::de::Error, + { + Ok(Value::Null) + } + + fn visit_seq(self, mut seq: A) -> Result + where + A: serde::de::SeqAccess<'de>, + { + let mut vec = Vec::new(); + + while let Some(elem) = seq.next_element()? { + vec.push(elem); + } + + Ok(Value::Array(vec)) + } + + fn visit_map(self, mut map: A) -> Result + where + A: serde::de::MapAccess<'de>, + { + let mut dict = Object::new(); + + while let Some((key, value)) = map.next_entry()? { + dict.insert(key, value); + } + + Ok(Value::Object(dict)) + } + } + + deserializer.deserialize_any(ValueVisitor) } } impl Serialize for Value { + #[inline] fn serialize(&self, serializer: S) -> Result where S: serde::Serializer, { - todo!() + match self { + Self::String(s) => serializer.serialize_str(s), + Self::Integer(i) => i.serialize(serializer), + Self::Float(f) => f.serialize(serializer), + Self::Boolean(v) => serializer.serialize_bool(*v), + Self::Object(obj) => { + use serde::ser::SerializeMap; + let mut map = serializer.serialize_map(Some(obj.len()))?; + + for (k, v) in obj { + map.serialize_entry(k, v)?; + } + + map.end() + } + Self::Array(v) => v.serialize(serializer), + Self::Null => serializer.serialize_unit(), + } } } From 55324b413e1da5ee93ee9c2c2abb113f563b85ef Mon Sep 17 00:00:00 2001 From: Matilde Morrone Date: Sat, 29 Mar 2025 14:17:09 +0100 Subject: [PATCH 06/26] Move value to its own module --- src/{value.rs => value/de.rs} | 45 +++-------------------------------- src/value/mod.rs | 17 +++++++++++++ src/value/ser.rs | 30 +++++++++++++++++++++++ 3 files changed, 50 insertions(+), 42 deletions(-) rename src/{value.rs => value/de.rs} (70%) create mode 100644 src/value/mod.rs create mode 100644 src/value/ser.rs diff --git a/src/value.rs b/src/value/de.rs similarity index 70% rename from src/value.rs rename to src/value/de.rs index e68121b..026860e 100644 --- a/src/value.rs +++ b/src/value/de.rs @@ -1,18 +1,6 @@ -use indexmap::IndexMap; -use serde::{de::Visitor, Deserialize, Serialize}; - -pub type Object = IndexMap; - -#[derive(Debug, Clone)] -pub enum Value { - String(String), - Integer(i64), // FIXME: Use a custom number wrapper to handle both signed and unsigned integers - Float(f64), - Boolean(bool), - Object(Object), - Array(Vec), - Null, -} +use serde::{de::Visitor, Deserialize}; + +use crate::{Object, Value}; impl<'de> Deserialize<'de> for Value { fn deserialize(deserializer: D) -> Result @@ -116,30 +104,3 @@ impl<'de> Deserialize<'de> for Value { deserializer.deserialize_any(ValueVisitor) } } - -impl Serialize for Value { - #[inline] - fn serialize(&self, serializer: S) -> Result - where - S: serde::Serializer, - { - match self { - Self::String(s) => serializer.serialize_str(s), - Self::Integer(i) => i.serialize(serializer), - Self::Float(f) => f.serialize(serializer), - Self::Boolean(v) => serializer.serialize_bool(*v), - Self::Object(obj) => { - use serde::ser::SerializeMap; - let mut map = serializer.serialize_map(Some(obj.len()))?; - - for (k, v) in obj { - map.serialize_entry(k, v)?; - } - - map.end() - } - Self::Array(v) => v.serialize(serializer), - Self::Null => serializer.serialize_unit(), - } - } -} diff --git a/src/value/mod.rs b/src/value/mod.rs new file mode 100644 index 0000000..b80e703 --- /dev/null +++ b/src/value/mod.rs @@ -0,0 +1,17 @@ +use indexmap::IndexMap; + +mod de; +mod ser; + +pub type Object = IndexMap; + +#[derive(Debug, Clone)] +pub enum Value { + String(String), + Integer(i64), // FIXME: Use a custom number wrapper to handle both signed and unsigned integers + Float(f64), + Boolean(bool), + Object(Object), + Array(Vec), + Null, +} diff --git a/src/value/ser.rs b/src/value/ser.rs new file mode 100644 index 0000000..65abb5b --- /dev/null +++ b/src/value/ser.rs @@ -0,0 +1,30 @@ +use serde::Serialize; + +use crate::Value; + +impl Serialize for Value { + #[inline] + fn serialize(&self, serializer: S) -> Result + where + S: serde::Serializer, + { + match self { + Self::String(s) => serializer.serialize_str(s), + Self::Integer(i) => i.serialize(serializer), + Self::Float(f) => f.serialize(serializer), + Self::Boolean(v) => serializer.serialize_bool(*v), + Self::Object(obj) => { + use serde::ser::SerializeMap; + let mut map = serializer.serialize_map(Some(obj.len()))?; + + for (k, v) in obj { + map.serialize_entry(k, v)?; + } + + map.end() + } + Self::Array(v) => v.serialize(serializer), + Self::Null => serializer.serialize_unit(), + } + } +} From 05a910c0aace30070305e36b902a0fd964745c9a Mon Sep 17 00:00:00 2001 From: Matilde Morrone Date: Sat, 29 Mar 2025 14:22:00 +0100 Subject: [PATCH 07/26] Implement a custom integer type --- Cargo.lock | 1 + Cargo.toml | 1 + src/value/de.rs | 10 ++++- src/value/integer.rs | 89 ++++++++++++++++++++++++++++++++++++++++++++ src/value/mod.rs | 5 ++- 5 files changed, 104 insertions(+), 2 deletions(-) create mode 100644 src/value/integer.rs diff --git a/Cargo.lock b/Cargo.lock index 8bbd964..268861f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -409,6 +409,7 @@ dependencies = [ "anyhow", "criterion", "indexmap", + "itoa", "mlua", "paste", "serde", diff --git a/Cargo.toml b/Cargo.toml index e66a905..2cc0366 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -32,6 +32,7 @@ crate-type = ["cdylib", "rlib"] # Core serde = "1.0.219" indexmap = { version = "2.8.0", features = ["serde"] } +itoa = "1.0.15" # # WASM support (optional) # wasm-bindgen = { version = "0.2.100", optional = true } diff --git a/src/value/de.rs b/src/value/de.rs index 026860e..5b80b29 100644 --- a/src/value/de.rs +++ b/src/value/de.rs @@ -29,7 +29,15 @@ impl<'de> Deserialize<'de> for Value { where E: serde::de::Error, { - Ok(Value::Integer(v)) + Ok(Value::Integer(v.into())) + } + + #[inline] + fn visit_u64(self, v: u64) -> Result + where + E: serde::de::Error, + { + Ok(Value::Integer(v.into())) } fn visit_f64(self, v: f64) -> Result diff --git a/src/value/integer.rs b/src/value/integer.rs new file mode 100644 index 0000000..0d87df9 --- /dev/null +++ b/src/value/integer.rs @@ -0,0 +1,89 @@ +use serde::Serialize; +use std::fmt::{Debug, Display}; + +#[derive(PartialEq, Eq, PartialOrd, Ord, Clone)] +pub struct Integer { + inner: IntegerType, +} + +impl Debug for Integer { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "Integer({})", self) + } +} + +impl Display for Integer { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self.inner { + IntegerType::Negative(n) => f.write_str(itoa::Buffer::new().format(n)), + IntegerType::Positive(n) => f.write_str(itoa::Buffer::new().format(n)), + } + } +} + +#[derive(PartialEq, Eq, PartialOrd, Ord, Clone)] +enum IntegerType { + Negative(i64), + Positive(u64), +} + +impl From for Integer { + fn from(value: i64) -> Self { + Self { + inner: IntegerType::Negative(value), + } + } +} + +impl From for Integer { + fn from(value: u64) -> Self { + Self { + inner: IntegerType::Positive(value), + } + } +} + +impl Integer { + pub const fn is_i64(&self) -> bool { + match self.inner { + IntegerType::Positive(n) => n <= i64::max_value() as u64, + IntegerType::Negative(_) => true, + } + } + + pub const fn is_u64(&self) -> bool { + matches!(self.inner, IntegerType::Positive(_)) + } + + pub const fn as_i64(&self) -> Option { + match self.inner { + IntegerType::Negative(n) => Some(n), + IntegerType::Positive(n) => { + if n <= i64::MAX as u64 { + Some(n as i64) + } else { + None + } + } + } + } + + pub const fn as_u64(&self) -> Option { + match self.inner { + IntegerType::Positive(n) => Some(n), + IntegerType::Negative(_) => None, + } + } +} + +impl Serialize for Integer { + fn serialize(&self, serializer: S) -> Result + where + S: serde::Serializer, + { + match self.inner { + IntegerType::Negative(integer) => serializer.serialize_i64(integer), + IntegerType::Positive(integer) => serializer.serialize_u64(integer), + } + } +} diff --git a/src/value/mod.rs b/src/value/mod.rs index b80e703..363bdeb 100644 --- a/src/value/mod.rs +++ b/src/value/mod.rs @@ -1,14 +1,17 @@ use indexmap::IndexMap; mod de; +mod integer; mod ser; +pub use integer::Integer; + pub type Object = IndexMap; #[derive(Debug, Clone)] pub enum Value { String(String), - Integer(i64), // FIXME: Use a custom number wrapper to handle both signed and unsigned integers + Integer(Integer), Float(f64), Boolean(bool), Object(Object), From 68ea73cdfb8561126b98c9897880f2f326aacd7e Mon Sep 17 00:00:00 2001 From: Matilde Morrone Date: Sat, 29 Mar 2025 14:27:46 +0100 Subject: [PATCH 08/26] Implement value helpers --- src/value/mod.rs | 241 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 241 insertions(+) diff --git a/src/value/mod.rs b/src/value/mod.rs index 363bdeb..27e9448 100644 --- a/src/value/mod.rs +++ b/src/value/mod.rs @@ -6,15 +6,256 @@ mod ser; pub use integer::Integer; +/// Object: Key-value collection that preserves insertion order pub type Object = IndexMap; +/// Represents a Corn configuration value. +/// +/// This enum encompasses all possible value types in the Corn language specification: +/// - String: UTF-8 string values +/// - Integer: 64-bit signed integers +/// - Float: 64-bit floating point numbers +/// - Boolean: true/false values +/// - Object: Key-value collection that preserves insertion order +/// - Array: Ordered collections of values +/// - Null: Represents absence of a value #[derive(Debug, Clone)] pub enum Value { + /// A UTF-8 string value String(String), + /// A 64-bit signed integer Integer(Integer), + /// A 64-bit floating point number Float(f64), + /// A boolean value (true or false) Boolean(bool), + /// A key-value collection that preserves insertion order Object(Object), + /// An ordered collection of values Array(Vec), + /// Represents the absence of a value Null, } + +impl Value { + /// Returns true if the value is a String. + pub const fn is_string(&self) -> bool { + matches!(self, Self::String(_)) + } + + /// Returns true if the value is an Integer. + pub const fn is_integer(&self) -> bool { + matches!(self, Self::Integer(_)) + } + + /// Returns true if the value is a Float. + pub const fn is_float(&self) -> bool { + matches!(self, Self::Float(_)) + } + + /// Returns true if the value is a Boolean. + pub const fn is_boolean(&self) -> bool { + matches!(self, Self::Boolean(_)) + } + + /// Returns true if the value is an Object. + pub const fn is_object(&self) -> bool { + matches!(self, Self::Object(_)) + } + + /// Returns true if the value is an Array. + pub const fn is_array(&self) -> bool { + matches!(self, Self::Array(_)) + } + + /// Returns true if the value is Null. + pub const fn is_null(&self) -> bool { + matches!(self, Self::Null) + } + + /// Returns the inner String if this value is a String, otherwise None. + pub fn as_string(&self) -> Option<&String> { + match self { + Self::String(s) => Some(s), + _ => None, + } + } + + /// Returns the inner Integer if this value is an Integer, otherwise None. + pub const fn as_integer(&self) -> Option<&Integer> { + match self { + Self::Integer(integer) => Some(integer), + _ => None, + } + } + + /// Returns the inner Float if this value is a Float, otherwise None. + pub const fn as_float(&self) -> Option<&f64> { + match self { + Self::Float(f) => Some(f), + _ => None, + } + } + + /// Returns the inner Boolean if this value is a Boolean, otherwise None. + pub const fn as_boolean(&self) -> Option<&bool> { + match self { + Self::Boolean(b) => Some(b), + _ => None, + } + } + + /// Returns the inner Object if this value is an Object, otherwise None. + pub fn as_object(&self) -> Option<&Object> { + match self { + Self::Object(obj) => Some(obj), + _ => None, + } + } + + /// Returns the inner Array if this value is an Array, otherwise None. + pub fn as_array(&self) -> Option<&Vec> { + match self { + Self::Array(arr) => Some(arr), + _ => None, + } + } + + /// Returns a mutable reference to the inner String if this value is a String, otherwise None. + pub fn as_string_mut(&mut self) -> Option<&mut String> { + match self { + Self::String(s) => Some(s), + _ => None, + } + } + + /// Returns a mutable reference to the inner Integer if this value is an Integer, otherwise None. + pub fn as_integer_mut(&mut self) -> Option<&mut Integer> { + match self { + Self::Integer(i) => Some(i), + _ => None, + } + } + + /// Returns a mutable reference to the inner Float if this value is a Float, otherwise None. + pub fn as_float_mut(&mut self) -> Option<&mut f64> { + match self { + Self::Float(f) => Some(f), + _ => None, + } + } + + /// Returns a mutable reference to the inner Boolean if this value is a Boolean, otherwise None. + pub fn as_boolean_mut(&mut self) -> Option<&mut bool> { + match self { + Self::Boolean(b) => Some(b), + _ => None, + } + } + + /// Returns a mutable reference to the inner Object if this value is an Object, otherwise None. + pub fn as_object_mut(&mut self) -> Option<&mut Object> { + match self { + Self::Object(obj) => Some(obj), + _ => None, + } + } + + /// Returns a mutable reference to the inner Array if this value is an Array, otherwise None. + pub fn as_array_mut(&mut self) -> Option<&mut Vec> { + match self { + Self::Array(arr) => Some(arr), + _ => None, + } + } + + /// Takes the inner String if this value is a String, otherwise None. + pub fn take_string(self) -> Option { + match self { + Self::String(s) => Some(s), + _ => None, + } + } + + /// Takes the inner Integer if this value is an Integer, otherwise None. + pub fn take_integer(self) -> Option { + match self { + Self::Integer(i) => Some(i), + _ => None, + } + } + + /// Takes the inner Float if this value is a Float, otherwise None. + pub fn take_float(self) -> Option { + match self { + Self::Float(f) => Some(f), + _ => None, + } + } + + /// Takes the inner Boolean if this value is a Boolean, otherwise None. + pub fn take_boolean(self) -> Option { + match self { + Self::Boolean(b) => Some(b), + _ => None, + } + } + + /// Takes the inner Object if this value is an Object, otherwise None. + pub fn take_object(self) -> Option { + match self { + Self::Object(obj) => Some(obj), + _ => None, + } + } + + /// Takes the inner Array if this value is an Array, otherwise None. + pub fn take_array(self) -> Option> { + match self { + Self::Array(arr) => Some(arr), + _ => None, + } + } + + /// Returns true if the value is empty. + /// An empty value is an empty String, empty Object, empty Array, or Null. + pub fn is_empty(&self) -> bool { + match self { + Self::String(s) => s.is_empty(), + Self::Object(obj) => obj.is_empty(), + Self::Array(arr) => arr.is_empty(), + Self::Null => true, + _ => false, + } + } + + /// Returns the number of elements in this Value. + /// For objects this is the number of key-value pairs, for arrays it's the number of elements, + /// for strings it's the string length, and for other types it's 0. + pub fn len(&self) -> usize { + match self { + Self::String(s) => s.len(), + Self::Object(obj) => obj.len(), + Self::Array(arr) => arr.len(), + _ => 0, + } + } + + /// Get a reference to a value in an object by key. + /// Returns None if the value is not an object or if the key doesn't exist. + pub fn get(&self, key: &str) -> Option<&Value> { + match self { + Self::Object(obj) => obj.get(key), + _ => None, + } + } + + /// Get a reference to a value in an array by index. + /// Returns None if the value is not an array or if the index is out of bounds. + pub fn get_index(&self, index: usize) -> Option<&Value> { + match self { + Self::Array(arr) => arr.get(index), + _ => None, + } + } +} From fa63a507198d2ed57d0556e9311c12d8b1308614 Mon Sep 17 00:00:00 2001 From: Matilde Morrone Date: Sat, 29 Mar 2025 14:28:22 +0100 Subject: [PATCH 09/26] Add a warning about missing docs --- src/lib.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/lib.rs b/src/lib.rs index 608a85f..dffeb35 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,3 +1,5 @@ +#![warn(missing_docs)] + #[cfg(any( feature = "lua51", feature = "lua52", From 72ef842cf5bba62ba9f2ecbd5e6c7bb01272fa46 Mon Sep 17 00:00:00 2001 From: Matilde Morrone Date: Sat, 29 Mar 2025 14:36:30 +0100 Subject: [PATCH 10/26] Improve error implementation --- src/error.rs | 48 ++++++++++++++++++++++++++++++++++++------------ 1 file changed, 36 insertions(+), 12 deletions(-) diff --git a/src/error.rs b/src/error.rs index 5a84080..6affee3 100644 --- a/src/error.rs +++ b/src/error.rs @@ -1,32 +1,56 @@ use std::fmt::Display; +/// A type alias for the standard Result type, defaulting to the crate's Error type. pub type Result = std::result::Result; +/// Represents all possible errors that can occur when parsing or processing Corn data. #[derive(Debug)] pub enum Error { + /// An I/O error that occurred during file operations. Io(std::io::Error), - // #[error("failed to resolve referenced input `{0}`")] - // InputResolveError(String), + /// Indicates an unexpected end of input during parsing. + Eof, - // #[error("attempted to use dot-notation on non-object value at `{0}`")] - // InvalidPathError(String), + /// Indicates that an unexpected token was encountered during parsing. + UnexpectedToken { + /// Description of what was expected at this position + expected: &'static str, + /// The actual byte value found + found: u8, + /// The position in the input where the unexpected token was found + index: usize, + }, - // #[error("attempted to spread a type that differs from its containing type at `{0}`")] - // InvalidSpreadError(String), - - // #[error("attempted to interpolate a non-string type into a string at `{0}`")] - // InvalidInterpolationError(String), - - // #[error("failed to deserialize input: {0}")] + /// This variant is typically used when handling serde-specific deserialization errors. DeserializationError(String), } +impl Error { + /// Creates a new UnexpectedToken error with the provided details. + pub const fn unexpected_token(expected: &'static str, found: u8, index: usize) -> Self { + Self::UnexpectedToken { + expected, + found, + index, + } + } +} + impl Display for Error { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { Self::Io(e) => e.fmt(f), - Self::DeserializationError(s) => s.fmt(f), + Self::Eof => write!(f, "Unexpected end of input"), + Self::UnexpectedToken { + expected, + found, + index, + } => write!( + f, + "Unexpected Token {found} at index {index}, expected {expected}" + ), + Self::DeserializationError(s) => write!(f, "Failed to deserialize input: {s}"), } } } From dfe45261099859cc0fc0b83fde466b65f77ebcca Mon Sep 17 00:00:00 2001 From: Matilde Morrone Date: Sat, 29 Mar 2025 14:37:31 +0100 Subject: [PATCH 11/26] Include the readme as the top level doc --- src/lib.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/src/lib.rs b/src/lib.rs index dffeb35..904c563 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,4 +1,5 @@ #![warn(missing_docs)] +#![doc = include_str!("../README.md")] #[cfg(any( feature = "lua51", From 95c88b51c0bba5b2a738e6b30f7c61f7acd09621 Mon Sep 17 00:00:00 2001 From: Matilde Morrone Date: Sat, 29 Mar 2025 14:40:44 +0100 Subject: [PATCH 12/26] Avoid using a deprecated method for integer --- src/value/integer.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/value/integer.rs b/src/value/integer.rs index 0d87df9..5d2063e 100644 --- a/src/value/integer.rs +++ b/src/value/integer.rs @@ -46,7 +46,7 @@ impl From for Integer { impl Integer { pub const fn is_i64(&self) -> bool { match self.inner { - IntegerType::Positive(n) => n <= i64::max_value() as u64, + IntegerType::Positive(n) => n <= i64::MAX as u64, IntegerType::Negative(_) => true, } } From d6fdd9908a4c86954f4c90469719516d765e6e22 Mon Sep 17 00:00:00 2001 From: Matilde Morrone Date: Sat, 29 Mar 2025 14:42:45 +0100 Subject: [PATCH 13/26] Add myself to the authors --- Cargo.toml | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 2cc0366..60a1437 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -10,7 +10,10 @@ description = "Parsing engine for Corn, a simple and pain-free configuration lan repository = "https://github.com/JakeStanger/corn" categories = ["config", "parsing"] keywords = ["configuration", "language", "wasm", "peg"] -authors = ["Jake Stanger "] +authors = [ + "Jake Stanger ", + "Matilde Morrone ", +] homepage = "https://cornlang.dev/" documentation = "https://docs.rs/libcorn" readme = "README.md" From ea9c6d23743ec3e1e18d664122eca2426b3c7246 Mon Sep 17 00:00:00 2001 From: Matilde Morrone Date: Sat, 29 Mar 2025 14:45:14 +0100 Subject: [PATCH 14/26] Fix parser tests --- tests/parser_tests.rs | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/tests/parser_tests.rs b/tests/parser_tests.rs index 7b6d8ff..073ef8a 100644 --- a/tests/parser_tests.rs +++ b/tests/parser_tests.rs @@ -1,6 +1,6 @@ extern crate core; -use corn::parse; +use corn::Value; use paste::paste; use std::fs; @@ -16,7 +16,7 @@ macro_rules! generate_eq_tests { let input = fs::read_to_string(format!("{root_dir}/assets/inputs/{test_name}.corn")).unwrap(); let valid = fs::read_to_string(format!("{root_dir}/assets/outputs/json/{test_name}.json")).unwrap().replace("\r", ""); - let config = parse(input.as_str()).unwrap(); + let config: Value = corn::from_str(input.as_str()).unwrap(); let serialized = serde_json::to_string_pretty(&config).unwrap().replace("\r", ""); assert_eq!(serialized.trim(), valid.trim()); @@ -30,7 +30,7 @@ macro_rules! generate_eq_tests { let input = fs::read_to_string(format!("{root_dir}/assets/inputs/{test_name}.corn")).unwrap(); let valid = fs::read_to_string(format!("{root_dir}/assets/outputs/yaml/{test_name}.yml")).unwrap().replace("\r", ""); - let config = parse(input.as_str()).unwrap(); + let config: Value = corn::from_str(input.as_str()).unwrap(); let serialized = serde_norway::to_string(&config).unwrap().replace("\r", ""); assert_eq!(serialized.trim(), valid.trim()); @@ -44,7 +44,7 @@ macro_rules! generate_eq_tests { let input = fs::read_to_string(format!("{root_dir}/assets/inputs/{test_name}.corn")).unwrap(); let valid = fs::read_to_string(format!("{root_dir}/assets/outputs/toml/{test_name}.toml")).unwrap().replace("\r", ""); - let config = parse(input.as_str()).unwrap(); + let config: Value = corn::from_str(input.as_str()).unwrap(); // fall back to default as toml can fail due to no null let serialized = toml_edit::ser::to_string_pretty(&config).unwrap_or_default().replace("\r", ""); @@ -66,7 +66,7 @@ macro_rules! generate_invalid_tests { let input = fs::read_to_string(format!("{root_dir}/assets/inputs/{}.corn", test_name)).unwrap(); - let config = parse(input.as_str()); + let config = corn::from_str::(input.as_str()); assert!(config.is_err()); } )+ From 408f1025972f4c5432f545139ec62af0d53a4523 Mon Sep 17 00:00:00 2001 From: Matilde Morrone Date: Sat, 29 Mar 2025 17:19:14 +0100 Subject: [PATCH 15/26] Initial deserializer structure implementation --- src/de.rs | 345 ++++++++++++++++++++++++++++++++++++------------------ 1 file changed, 233 insertions(+), 112 deletions(-) diff --git a/src/de.rs b/src/de.rs index d949003..29dee09 100644 --- a/src/de.rs +++ b/src/de.rs @@ -2,8 +2,9 @@ use std::collections::HashMap; use serde::de; -use crate::{Error, Value}; +use crate::{Error, Result, Value}; +/// A structure that deserializes Corn into Rust values. #[derive(Clone)] pub struct Deserializer<'de> { bytes: &'de [u8], @@ -11,12 +12,91 @@ pub struct Deserializer<'de> { variables: HashMap, } -impl Deserializer<'_> { - pub fn from_str(input: &str) -> Self { +impl<'de> Deserializer<'de> { + /// Refer to the `Deserializer::from_str` method for more info. + pub fn from_str(input: &'de str) -> Self { + Self { + bytes: input.as_bytes(), + index: 0, + variables: HashMap::new(), + } + } + + fn advance(&mut self) { + self.index += 1; + } + + fn next(&mut self) -> Result> { + let byte = self.peek()?; + + self.advance(); + + Ok(byte) + } + + fn peek(&mut self) -> Result> { + Ok(self.bytes.get(self.index).copied()) + } + + fn parse_whitespace(&mut self) -> Result> { + loop { + match self.peek()? { + Some(byte) if byte.is_ascii_whitespace() => { + self.advance(); + } + other => return Ok(other), + } + } + } + + fn whitespace_or_eof(&mut self) -> Result { + match self.parse_whitespace() { + Ok(Some(byte)) => Ok(byte), + Ok(None) => Err(Error::Eof), + Err(err) => Err(err), + } + } + + fn parse_key(&mut self) -> Result { todo!() } + + fn parse_ident(&mut self, ident: &[u8]) -> Result<()> { + for expected in ident { + match self.next()? { + None => { + return Err(Error::Eof); + } + Some(next) => { + if next != *expected { + return Err(Error::DeserializationError("No ident".to_string())); + } + } + } + } + + Ok(()) + } } +/// Deserializes a Corn-formatted string into a Rust type. +/// +/// # Example +/// +/// ``` +/// use corn::from_str; +/// +/// #[derive(serde::Deserialize)] +/// struct Config { +/// name: String, +/// version: u32, +/// } +/// +/// let corn_str = "{ name = \"My App\" version = 1 }"; +/// let config: Config = from_str(corn_str).unwrap(); +/// assert_eq!(config.name, "My App"); +/// assert_eq!(config.version, 1); +/// ``` pub fn from_str<'a, T>(s: &'a str) -> Result where T: de::Deserialize<'a>, @@ -33,14 +113,62 @@ impl<'de> de::Deserializer<'de> for &mut Deserializer<'de> { where V: de::Visitor<'de>, { - todo!() + match self.whitespace_or_eof()? { + b'{' => { + self.advance(); + visitor.visit_map(MapAccess::new(self)) + } + b'[' => { + self.advance(); + visitor.visit_seq(SeqAccess::new(self)) + } + b'n' => { + self.parse_ident(b"null")?; + visitor.visit_unit() + } + b't' => { + self.parse_ident(b"true")?; + visitor.visit_bool(true) + } + b'f' => { + self.parse_ident(b"false")?; + visitor.visit_bool(false) + } + b'-' => { + unimplemented!("Negative number") + } + b'0'..=b'9' => { + unimplemented!("Number parsing") + } + b'"' => { + unimplemented!("String parsing") + } + token => Err(Error::unexpected_token( + "one of: ", // FIXME: include more info + token, self.index, + )), + } } fn deserialize_bool(self, visitor: V) -> Result where V: de::Visitor<'de>, { - todo!() + match self.whitespace_or_eof()? { + b't' => { + self.parse_ident(b"true")?; + visitor.visit_bool(true) + } + b'f' => { + self.parse_ident(b"false")?; + visitor.visit_bool(false) + } + token => Err(Error::unexpected_token( + "one of: true, false", + token, + self.index, + )), + } } fn deserialize_i8(self, visitor: V) -> Result @@ -131,7 +259,7 @@ impl<'de> de::Deserializer<'de> for &mut Deserializer<'de> { where V: de::Visitor<'de>, { - todo!() + self.deserialize_str(visitor) } fn deserialize_bytes(self, visitor: V) -> Result @@ -226,7 +354,7 @@ impl<'de> de::Deserializer<'de> for &mut Deserializer<'de> { where V: de::Visitor<'de>, { - todo!() + self.deserialize_map(visitor) } fn deserialize_enum( @@ -256,108 +384,101 @@ impl<'de> de::Deserializer<'de> for &mut Deserializer<'de> { } } -// impl Entry { -// pub const fn as_type(&self) -> &'static str { -// match self { -// Self::String(_) => todo!(), -// // Self::InterpolatedString(string_parts) => todo!(), -// Self::Integer(_) => todo!(), -// Self::Float(_) => todo!(), -// Self::Boolean(_) => todo!(), -// Self::Object(object_entry) => todo!(), -// Self::Array(array_entry) => todo!(), -// Self::Input(_) => todo!(), -// Self::Null => todo!(), -// } -// } -// } - -// #[derive(Debug, Clone)] -// pub enum ObjectEntry { -// Flat(IndexMap), -// WithSpreads(Vec), -// } - -// #[derive(Debug, Clone)] -// pub enum ObjectPart { -// Pair(String, Entry), -// Spread(String), -// } - -// #[derive(Debug, Clone)] -// pub enum ArrayEntry { -// Flat(Vec), -// WithSpreads(Vec), -// } - -// /// Part of an array with spreads -// #[derive(Debug, Clone)] -// pub enum ArrayPart { -// Entry(Entry), -// Spread(String), -// } - -// /// Helpers for the parser -// #[derive(Debug, Clone)] -// pub(crate) enum SpreadOr { -// Spread(String), -// Other(T), -// } - -// pub(crate) fn pairs_to_object(pairs: Vec>) -> ObjectEntry { -// let has_spreads = pairs.iter().any(|p| matches!(p, SpreadOr::Spread(_))); - -// if has_spreads { -// let parts: Vec = pairs -// .into_iter() -// .map(|p| match p { -// SpreadOr::Other((k, v)) => ObjectPart::Pair(k, v), -// SpreadOr::Spread(name) => ObjectPart::Spread(name), -// }) -// .collect(); -// ObjectEntry::WithSpreads(parts) -// } else { -// let map: IndexMap = pairs -// .into_iter() -// .filter_map(|p| match p { -// SpreadOr::Other((k, v)) => Some((k, v)), -// _ => None, // This should never happen if has_spreads is false -// }) -// .collect(); -// ObjectEntry::Flat(map) -// } -// } - -// pub(crate) fn entries_to_array(entries: Vec>) -> ArrayEntry { -// let has_spreads = entries.iter().any(|e| matches!(e, SpreadOr::Spread(_))); - -// if has_spreads { -// let parts: Vec = entries -// .into_iter() -// .map(|e| match e { -// SpreadOr::Other(v) => ArrayPart::Entry(v), -// SpreadOr::Spread(name) => ArrayPart::Spread(name), -// }) -// .collect(); -// ArrayEntry::WithSpreads(parts) -// } else { -// let values: Vec = entries -// .into_iter() -// .filter_map(|e| match e { -// SpreadOr::Other(v) => Some(v), -// _ => None, // This should never happen if has_spreads is false -// }) -// .collect(); -// ArrayEntry::Flat(values) -// } -// } - -// // pub(crate) fn create_nested_entry(keys: Vec, value: Entry) -> Entry { -// // let mut current = value; - -// // for key in keys.into_iter().rev() { -// // current = Entry::Object(ObjectEntry::Flat(indexmap! {key => current})); -// // } - -// // current -// // } +struct SeqAccess<'a, 'de: 'a> { + de: &'a mut Deserializer<'de>, +} + +impl<'a, 'de> SeqAccess<'a, 'de> { + pub fn new(de: &'a mut Deserializer<'de>) -> Self { + Self { de } + } +} + +impl<'a, 'de> de::SeqAccess<'de> for SeqAccess<'a, 'de> { + type Error = Error; + + fn next_element_seed( + &mut self, + seed: T, + ) -> std::result::Result, Self::Error> + where + T: de::DeserializeSeed<'de>, + { + todo!() + } +} + +struct MapAccess<'a, 'de: 'a> { + de: &'a mut Deserializer<'de>, +} + +impl<'a, 'de> MapAccess<'a, 'de> { + pub fn new(de: &'a mut Deserializer<'de>) -> Self { + Self { de } + } +} + +impl<'a, 'de> de::MapAccess<'de> for MapAccess<'a, 'de> { + type Error = Error; + + fn next_key_seed(&mut self, seed: K) -> std::result::Result, Self::Error> + where + K: de::DeserializeSeed<'de>, + { + println!("Calling key seed"); + + match self.de.whitespace_or_eof()? { + b'}' => { + self.de.advance(); + return Ok(None); + } + b'\'' => { + todo!() + } + token => { + println!("Got to token {token}"); + let start = self.de.index; + + loop { + match self.de.peek()? { + Some(byte) => { + if byte.is_ascii_whitespace() || matches!(byte, b'.' | b'=') { + break; + } + + self.de.advance(); + } + None => break, + } + } + + let end = self.de.index; + + if start == end { + // return Err(Error::EmptyKey); + } + + let key = std::str::from_utf8(&self.de.bytes[start..end]) + .map_err(|_| Error::InvalidUtf8)?; + + seed.deserialize(de::value::StrDeserializer::new(key)) + .map(Some) + } + } + } + + fn next_value_seed(&mut self, seed: V) -> std::result::Result + where + V: de::DeserializeSeed<'de>, + { + println!("Calling value seed"); + + match self.de.whitespace_or_eof()? { + b'=' => { + self.de.advance(); // Skip the equals sign + seed.deserialize(&mut *self.de) + } + token => Err(Error::unexpected_token("=", token, self.de.index)), + } + } +} From 78a4e20b9e0c4e165f8f75973ee06606cd4cb369 Mon Sep 17 00:00:00 2001 From: Matilde Morrone Date: Sat, 29 Mar 2025 17:37:43 +0100 Subject: [PATCH 16/26] Implement null and booleans --- src/de.rs | 36 ++++++++++++++++++++++++++++++++---- 1 file changed, 32 insertions(+), 4 deletions(-) diff --git a/src/de.rs b/src/de.rs index 29dee09..b996ade 100644 --- a/src/de.rs +++ b/src/de.rs @@ -114,6 +114,10 @@ impl<'de> de::Deserializer<'de> for &mut Deserializer<'de> { V: de::Visitor<'de>, { match self.whitespace_or_eof()? { + b'l' => { + self.parse_ident(b"let")?; + unimplemented!("Let block") + } b'{' => { self.advance(); visitor.visit_map(MapAccess::new(self)) @@ -280,14 +284,26 @@ impl<'de> de::Deserializer<'de> for &mut Deserializer<'de> { where V: de::Visitor<'de>, { - todo!() + match self.parse_whitespace()? { + Some(b'n') => { + self.parse_ident(b"null")?; + visitor.visit_none() + } + _ => visitor.visit_some(self), + } } fn deserialize_unit(self, visitor: V) -> Result where V: de::Visitor<'de>, { - todo!() + match self.whitespace_or_eof()? { + b'n' => { + self.parse_ident(b"null")?; + visitor.visit_unit() + } + token => Err(Error::unexpected_token("null", token, self.index)), + } } fn deserialize_unit_struct( @@ -316,7 +332,13 @@ impl<'de> de::Deserializer<'de> for &mut Deserializer<'de> { where V: de::Visitor<'de>, { - todo!() + match self.whitespace_or_eof()? { + b'[' => { + self.advance(); + visitor.visit_seq(SeqAccess::new(self)) + } + token => Err(Error::unexpected_token("[", token, self.index)), + } } fn deserialize_tuple(self, len: usize, visitor: V) -> Result @@ -342,7 +364,13 @@ impl<'de> de::Deserializer<'de> for &mut Deserializer<'de> { where V: de::Visitor<'de>, { - todo!() + match self.whitespace_or_eof()? { + b'{' => { + self.advance(); + visitor.visit_map(MapAccess::new(self)) + } + token => Err(Error::unexpected_token("{", token, self.index)), + } } fn deserialize_struct( From 3f39f79bda818d1e6c15ffc3d0baaa753369ec67 Mon Sep 17 00:00:00 2001 From: Matilde Morrone Date: Sat, 29 Mar 2025 18:19:18 +0100 Subject: [PATCH 17/26] Implement basic integer and let block parsing --- src/de.rs | 106 ++++++++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 96 insertions(+), 10 deletions(-) diff --git a/src/de.rs b/src/de.rs index b996ade..dd8dd25 100644 --- a/src/de.rs +++ b/src/de.rs @@ -14,12 +14,16 @@ pub struct Deserializer<'de> { impl<'de> Deserializer<'de> { /// Refer to the `Deserializer::from_str` method for more info. - pub fn from_str(input: &'de str) -> Self { - Self { + pub fn from_str(input: &'de str) -> Result { + let mut de = Self { bytes: input.as_bytes(), index: 0, variables: HashMap::new(), - } + }; + + de.parse_let_block()?; + + Ok(de) } fn advance(&mut self) { @@ -77,6 +81,82 @@ impl<'de> Deserializer<'de> { Ok(()) } + + fn parse_integer(&mut self, negative: bool) -> Result { + let next = self.whitespace_or_eof()?; + + match next { + c @ b'1'..=b'9' => { + let mut significand = (c - b'0') as i64; + + loop { + match self.next()? { + None => { + break Ok(if negative { + significand.wrapping_neg() + } else { + significand + }) + } + + Some(integer @ b'0'..=b'9') => { + let digit = (integer - b'0') as i64; + + significand = significand * 10 + digit; + } + + Some(token) => return Err(Error::unexpected_token("", token, self.index)), + } + } + } + + _ => todo!(), + } + } + + fn parse_let_block(&mut self) -> Result<()> { + match self.whitespace_or_eof()? { + b'{' => return Ok(()), + b'l' => { + self.parse_ident(b"let")?; + + match self.whitespace_or_eof()? { + b'{' => { + self.advance(); + loop { + match self.whitespace_or_eof()? { + b'$' => { + unimplemented!("key parsing") + } + b'}' => { + self.advance(); + break; + } + token => { + return Err(Error::unexpected_token( + "input definition or }", + token, + self.index, + )) + } + } + } + + match self.whitespace_or_eof()? { + b'i' => { + self.parse_ident(b"in")?; + } + token => return Err(Error::unexpected_token("in", token, self.index)), + } + } + token => return Err(Error::unexpected_token("{", token, self.index)), + } + } + token => return Err(Error::unexpected_token("one of: let, {", token, self.index)), + } + + Ok(()) + } } /// Deserializes a Corn-formatted string into a Rust type. @@ -101,7 +181,7 @@ pub fn from_str<'a, T>(s: &'a str) -> Result where T: de::Deserialize<'a>, { - let mut deserializer = Deserializer::from_str(s); + let mut deserializer = Deserializer::from_str(s)?; T::deserialize(&mut deserializer) } @@ -114,10 +194,6 @@ impl<'de> de::Deserializer<'de> for &mut Deserializer<'de> { V: de::Visitor<'de>, { match self.whitespace_or_eof()? { - b'l' => { - self.parse_ident(b"let")?; - unimplemented!("Let block") - } b'{' => { self.advance(); visitor.visit_map(MapAccess::new(self)) @@ -200,7 +276,14 @@ impl<'de> de::Deserializer<'de> for &mut Deserializer<'de> { where V: de::Visitor<'de>, { - todo!() + match self.whitespace_or_eof()? { + b'-' => { + self.advance(); + visitor.visit_i64(self.parse_integer(true)?) + } + b'0'..=b'9' => visitor.visit_i64(self.parse_integer(false)?), + _ => todo!(), + } } fn deserialize_u8(self, visitor: V) -> Result @@ -432,7 +515,10 @@ impl<'a, 'de> de::SeqAccess<'de> for SeqAccess<'a, 'de> { where T: de::DeserializeSeed<'de>, { - todo!() + match self.de.whitespace_or_eof()? { + b']' => Ok(None), + _ => seed.deserialize(&mut *self.de).map(Some), + } } } From 8980ba25fd37006b03eab811c0df2f1976d0f854 Mon Sep 17 00:00:00 2001 From: Matilde Morrone Date: Sat, 29 Mar 2025 18:21:20 +0100 Subject: [PATCH 18/26] Remove left over debug statements --- src/de.rs | 5 ----- 1 file changed, 5 deletions(-) diff --git a/src/de.rs b/src/de.rs index dd8dd25..fa9d387 100644 --- a/src/de.rs +++ b/src/de.rs @@ -539,8 +539,6 @@ impl<'a, 'de> de::MapAccess<'de> for MapAccess<'a, 'de> { where K: de::DeserializeSeed<'de>, { - println!("Calling key seed"); - match self.de.whitespace_or_eof()? { b'}' => { self.de.advance(); @@ -550,7 +548,6 @@ impl<'a, 'de> de::MapAccess<'de> for MapAccess<'a, 'de> { todo!() } token => { - println!("Got to token {token}"); let start = self.de.index; loop { @@ -585,8 +582,6 @@ impl<'a, 'de> de::MapAccess<'de> for MapAccess<'a, 'de> { where V: de::DeserializeSeed<'de>, { - println!("Calling value seed"); - match self.de.whitespace_or_eof()? { b'=' => { self.de.advance(); // Skip the equals sign From 44e7aed02add26c785c600421ced7ddc11ab1165 Mon Sep 17 00:00:00 2001 From: Matilde Morrone Date: Tue, 8 Apr 2025 16:26:26 +0200 Subject: [PATCH 19/26] Add basic string parsing --- src/de.rs | 29 +++++++++++++++++++++++++++-- src/error.rs | 3 +++ 2 files changed, 30 insertions(+), 2 deletions(-) diff --git a/src/de.rs b/src/de.rs index fa9d387..d5ac1a2 100644 --- a/src/de.rs +++ b/src/de.rs @@ -339,7 +339,32 @@ impl<'de> de::Deserializer<'de> for &mut Deserializer<'de> { where V: de::Visitor<'de>, { - todo!() + match self.whitespace_or_eof()? { + b'"' => { + self.advance(); + + let start = self.index; + + loop { + match self.next()? { + Some(byte) => { + if byte == b'"' { + break; + } + } + None => return Err(Error::Eof), + } + } + + let end = self.index; + + let string = + std::str::from_utf8(&self.bytes[start..end]).map_err(|_| Error::InvalidUtf8)?; + + visitor.visit_str(string) + } + _ => todo!(), + } } fn deserialize_string(self, visitor: V) -> Result @@ -547,7 +572,7 @@ impl<'a, 'de> de::MapAccess<'de> for MapAccess<'a, 'de> { b'\'' => { todo!() } - token => { + _ => { let start = self.de.index; loop { diff --git a/src/error.rs b/src/error.rs index 6affee3..21034b0 100644 --- a/src/error.rs +++ b/src/error.rs @@ -12,6 +12,8 @@ pub enum Error { /// Indicates an unexpected end of input during parsing. Eof, + InvalidUtf8, + /// Indicates that an unexpected token was encountered during parsing. UnexpectedToken { /// Description of what was expected at this position @@ -42,6 +44,7 @@ impl Display for Error { match self { Self::Io(e) => e.fmt(f), Self::Eof => write!(f, "Unexpected end of input"), + Self::InvalidUtf8 => todo!(), Self::UnexpectedToken { expected, found, From 90b95979e29876a91df982ea8941a0199b9cfee7 Mon Sep 17 00:00:00 2001 From: Matilde Morrone Date: Tue, 8 Apr 2025 16:28:59 +0200 Subject: [PATCH 20/26] Move sequence and object parsing to the bottom --- src/de.rs | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/src/de.rs b/src/de.rs index d5ac1a2..f80d4a9 100644 --- a/src/de.rs +++ b/src/de.rs @@ -194,14 +194,6 @@ impl<'de> de::Deserializer<'de> for &mut Deserializer<'de> { V: de::Visitor<'de>, { match self.whitespace_or_eof()? { - b'{' => { - self.advance(); - visitor.visit_map(MapAccess::new(self)) - } - b'[' => { - self.advance(); - visitor.visit_seq(SeqAccess::new(self)) - } b'n' => { self.parse_ident(b"null")?; visitor.visit_unit() @@ -223,6 +215,14 @@ impl<'de> de::Deserializer<'de> for &mut Deserializer<'de> { b'"' => { unimplemented!("String parsing") } + b'[' => { + self.advance(); + visitor.visit_seq(SeqAccess::new(self)) + } + b'{' => { + self.advance(); + visitor.visit_map(MapAccess::new(self)) + } token => Err(Error::unexpected_token( "one of: ", // FIXME: include more info token, self.index, From 4b127fb77d09a9fa135a3ad3ace7a6f5427422fd Mon Sep 17 00:00:00 2001 From: Matilde Morrone Date: Tue, 8 Apr 2025 16:33:25 +0200 Subject: [PATCH 21/26] Forward all number deserialization to i64 --- src/de.rs | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/src/de.rs b/src/de.rs index f80d4a9..aefa3aa 100644 --- a/src/de.rs +++ b/src/de.rs @@ -186,6 +186,7 @@ where T::deserialize(&mut deserializer) } +// TODO: more specialized number parsing impl<'de> de::Deserializer<'de> for &mut Deserializer<'de> { type Error = Error; @@ -255,21 +256,21 @@ impl<'de> de::Deserializer<'de> for &mut Deserializer<'de> { where V: de::Visitor<'de>, { - todo!() + self.deserialize_i64(visitor) } fn deserialize_i16(self, visitor: V) -> Result where V: de::Visitor<'de>, { - todo!() + self.deserialize_i64(visitor) } fn deserialize_i32(self, visitor: V) -> Result where V: de::Visitor<'de>, { - todo!() + self.deserialize_i64(visitor) } fn deserialize_i64(self, visitor: V) -> Result @@ -290,28 +291,28 @@ impl<'de> de::Deserializer<'de> for &mut Deserializer<'de> { where V: de::Visitor<'de>, { - todo!() + self.deserialize_i64(visitor) } fn deserialize_u16(self, visitor: V) -> Result where V: de::Visitor<'de>, { - todo!() + self.deserialize_i64(visitor) } fn deserialize_u32(self, visitor: V) -> Result where V: de::Visitor<'de>, { - todo!() + self.deserialize_i64(visitor) } fn deserialize_u64(self, visitor: V) -> Result where V: de::Visitor<'de>, { - todo!() + self.deserialize_i64(visitor) } fn deserialize_f32(self, visitor: V) -> Result From 62033b474f67a8c539afe6ea5413427274de331b Mon Sep 17 00:00:00 2001 From: Matilde Morrone Date: Tue, 8 Apr 2025 16:38:35 +0200 Subject: [PATCH 22/26] Forward some more deserializer methods --- src/de.rs | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/src/de.rs b/src/de.rs index aefa3aa..87cd2f7 100644 --- a/src/de.rs +++ b/src/de.rs @@ -187,6 +187,7 @@ where } // TODO: more specialized number parsing +// TODO: extract parsing logic from deserializer impl<'de> de::Deserializer<'de> for &mut Deserializer<'de> { type Error = Error; @@ -194,6 +195,7 @@ impl<'de> de::Deserializer<'de> for &mut Deserializer<'de> { where V: de::Visitor<'de>, { + // TODO: evaluate if forwarding method is correct match self.whitespace_or_eof()? { b'n' => { self.parse_ident(b"null")?; @@ -207,15 +209,9 @@ impl<'de> de::Deserializer<'de> for &mut Deserializer<'de> { self.parse_ident(b"false")?; visitor.visit_bool(false) } - b'-' => { - unimplemented!("Negative number") - } - b'0'..=b'9' => { - unimplemented!("Number parsing") - } - b'"' => { - unimplemented!("String parsing") - } + b'-' => self.deserialize_i64(visitor), + b'0'..=b'9' => self.deserialize_u64(visitor), + b'"' => self.deserialize_str(visitor), b'[' => { self.advance(); visitor.visit_seq(SeqAccess::new(self)) From f07b8255285db8f6785b0f8944589118597653b4 Mon Sep 17 00:00:00 2001 From: Matilde Morrone Date: Tue, 8 Apr 2025 16:52:54 +0200 Subject: [PATCH 23/26] Fix array and number parsing --- src/de.rs | 38 ++++++++++++++++++++++++-------------- 1 file changed, 24 insertions(+), 14 deletions(-) diff --git a/src/de.rs b/src/de.rs index 87cd2f7..b833e62 100644 --- a/src/de.rs +++ b/src/de.rs @@ -84,6 +84,7 @@ impl<'de> Deserializer<'de> { fn parse_integer(&mut self, negative: bool) -> Result { let next = self.whitespace_or_eof()?; + self.advance(); match next { c @ b'1'..=b'9' => { @@ -91,13 +92,8 @@ impl<'de> Deserializer<'de> { loop { match self.next()? { - None => { - break Ok(if negative { - significand.wrapping_neg() - } else { - significand - }) - } + Some(token) if token.is_ascii_whitespace() => break, + None => break, Some(integer @ b'0'..=b'9') => { let digit = (integer - b'0') as i64; @@ -105,11 +101,18 @@ impl<'de> Deserializer<'de> { significand = significand * 10 + digit; } - Some(token) => return Err(Error::unexpected_token("", token, self.index)), + Some(token) => { + return Err(Error::unexpected_token("integer", token, self.index)) + } } } - } + return Ok(if negative { + significand.wrapping_neg() + } else { + significand + }); + } _ => todo!(), } } @@ -209,8 +212,11 @@ impl<'de> de::Deserializer<'de> for &mut Deserializer<'de> { self.parse_ident(b"false")?; visitor.visit_bool(false) } - b'-' => self.deserialize_i64(visitor), - b'0'..=b'9' => self.deserialize_u64(visitor), + b'-' => { + self.advance(); + visitor.visit_i64(self.parse_integer(true)?) + } + b'0'..=b'9' => visitor.visit_i64(self.parse_integer(false)?), b'"' => self.deserialize_str(visitor), b'[' => { self.advance(); @@ -221,8 +227,9 @@ impl<'de> de::Deserializer<'de> for &mut Deserializer<'de> { visitor.visit_map(MapAccess::new(self)) } token => Err(Error::unexpected_token( - "one of: ", // FIXME: include more info - token, self.index, + "one of: any", // FIXME: include more info + token, + self.index, )), } } @@ -538,7 +545,10 @@ impl<'a, 'de> de::SeqAccess<'de> for SeqAccess<'a, 'de> { T: de::DeserializeSeed<'de>, { match self.de.whitespace_or_eof()? { - b']' => Ok(None), + b']' => { + self.de.advance(); + return Ok(None); + } _ => seed.deserialize(&mut *self.de).map(Some), } } From b53d8df24f706738aec294e18ea8ffa551ede841 Mon Sep 17 00:00:00 2001 From: Matilde Morrone Date: Tue, 8 Apr 2025 16:56:47 +0200 Subject: [PATCH 24/26] Fix string parser including quote character --- src/de.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/de.rs b/src/de.rs index b833e62..72021c4 100644 --- a/src/de.rs +++ b/src/de.rs @@ -360,7 +360,7 @@ impl<'de> de::Deserializer<'de> for &mut Deserializer<'de> { } } - let end = self.index; + let end = self.index - 1; let string = std::str::from_utf8(&self.bytes[start..end]).map_err(|_| Error::InvalidUtf8)?; From 2a2026146067e12cdb11e0dd380e8aeea8d2cca2 Mon Sep 17 00:00:00 2001 From: Matilde Morrone Date: Fri, 18 Apr 2025 12:31:08 +0200 Subject: [PATCH 25/26] Forward to deserialize any and include some positioning --- Cargo.lock | 1 + Cargo.toml | 1 + src/de.rs | 309 ++++++----------------------------------------------- 3 files changed, 37 insertions(+), 274 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 268861f..f65c704 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -410,6 +410,7 @@ dependencies = [ "criterion", "indexmap", "itoa", + "memchr", "mlua", "paste", "serde", diff --git a/Cargo.toml b/Cargo.toml index 60a1437..214e6a3 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -48,6 +48,7 @@ mlua = { version = "0.10.3", features = [ "macros", "serialize", ], optional = true } +memchr = "2.7.4" [dev-dependencies] wasm-bindgen-test = { version = "0.3.50" } diff --git a/src/de.rs b/src/de.rs index 72021c4..ba8134b 100644 --- a/src/de.rs +++ b/src/de.rs @@ -1,6 +1,6 @@ use std::collections::HashMap; -use serde::de; +use serde::{de, forward_to_deserialize_any}; use crate::{Error, Result, Value}; @@ -12,6 +12,11 @@ pub struct Deserializer<'de> { variables: HashMap, } +pub struct Position { + pub line: usize, + pub column: usize, +} + impl<'de> Deserializer<'de> { /// Refer to the `Deserializer::from_str` method for more info. pub fn from_str(input: &'de str) -> Result { @@ -26,6 +31,18 @@ impl<'de> Deserializer<'de> { Ok(de) } + fn position(&self, i: usize) -> Position { + let start_of_line = match memchr::memrchr(b'\n', &self.bytes[..i]) { + Some(position) => position + 1, + None => 0, + }; + + Position { + line: 1 + memchr::memchr_iter(b'\n', &self.bytes[..start_of_line]).count(), + column: i - start_of_line, + } + } + fn advance(&mut self) { self.index += 1; } @@ -198,7 +215,6 @@ impl<'de> de::Deserializer<'de> for &mut Deserializer<'de> { where V: de::Visitor<'de>, { - // TODO: evaluate if forwarding method is correct match self.whitespace_or_eof()? { b'n' => { self.parse_ident(b"null")?; @@ -217,133 +233,6 @@ impl<'de> de::Deserializer<'de> for &mut Deserializer<'de> { visitor.visit_i64(self.parse_integer(true)?) } b'0'..=b'9' => visitor.visit_i64(self.parse_integer(false)?), - b'"' => self.deserialize_str(visitor), - b'[' => { - self.advance(); - visitor.visit_seq(SeqAccess::new(self)) - } - b'{' => { - self.advance(); - visitor.visit_map(MapAccess::new(self)) - } - token => Err(Error::unexpected_token( - "one of: any", // FIXME: include more info - token, - self.index, - )), - } - } - - fn deserialize_bool(self, visitor: V) -> Result - where - V: de::Visitor<'de>, - { - match self.whitespace_or_eof()? { - b't' => { - self.parse_ident(b"true")?; - visitor.visit_bool(true) - } - b'f' => { - self.parse_ident(b"false")?; - visitor.visit_bool(false) - } - token => Err(Error::unexpected_token( - "one of: true, false", - token, - self.index, - )), - } - } - - fn deserialize_i8(self, visitor: V) -> Result - where - V: de::Visitor<'de>, - { - self.deserialize_i64(visitor) - } - - fn deserialize_i16(self, visitor: V) -> Result - where - V: de::Visitor<'de>, - { - self.deserialize_i64(visitor) - } - - fn deserialize_i32(self, visitor: V) -> Result - where - V: de::Visitor<'de>, - { - self.deserialize_i64(visitor) - } - - fn deserialize_i64(self, visitor: V) -> Result - where - V: de::Visitor<'de>, - { - match self.whitespace_or_eof()? { - b'-' => { - self.advance(); - visitor.visit_i64(self.parse_integer(true)?) - } - b'0'..=b'9' => visitor.visit_i64(self.parse_integer(false)?), - _ => todo!(), - } - } - - fn deserialize_u8(self, visitor: V) -> Result - where - V: de::Visitor<'de>, - { - self.deserialize_i64(visitor) - } - - fn deserialize_u16(self, visitor: V) -> Result - where - V: de::Visitor<'de>, - { - self.deserialize_i64(visitor) - } - - fn deserialize_u32(self, visitor: V) -> Result - where - V: de::Visitor<'de>, - { - self.deserialize_i64(visitor) - } - - fn deserialize_u64(self, visitor: V) -> Result - where - V: de::Visitor<'de>, - { - self.deserialize_i64(visitor) - } - - fn deserialize_f32(self, visitor: V) -> Result - where - V: de::Visitor<'de>, - { - todo!() - } - - fn deserialize_f64(self, visitor: V) -> Result - where - V: de::Visitor<'de>, - { - todo!() - } - - fn deserialize_char(self, visitor: V) -> Result - where - V: de::Visitor<'de>, - { - todo!() - } - - fn deserialize_str(self, visitor: V) -> Result - where - V: de::Visitor<'de>, - { - match self.whitespace_or_eof()? { b'"' => { self.advance(); @@ -367,160 +256,26 @@ impl<'de> de::Deserializer<'de> for &mut Deserializer<'de> { visitor.visit_str(string) } - _ => todo!(), - } - } - - fn deserialize_string(self, visitor: V) -> Result - where - V: de::Visitor<'de>, - { - self.deserialize_str(visitor) - } - - fn deserialize_bytes(self, visitor: V) -> Result - where - V: de::Visitor<'de>, - { - todo!() - } - - fn deserialize_byte_buf(self, visitor: V) -> Result - where - V: de::Visitor<'de>, - { - todo!() - } - - fn deserialize_option(self, visitor: V) -> Result - where - V: de::Visitor<'de>, - { - match self.parse_whitespace()? { - Some(b'n') => { - self.parse_ident(b"null")?; - visitor.visit_none() - } - _ => visitor.visit_some(self), - } - } - - fn deserialize_unit(self, visitor: V) -> Result - where - V: de::Visitor<'de>, - { - match self.whitespace_or_eof()? { - b'n' => { - self.parse_ident(b"null")?; - visitor.visit_unit() - } - token => Err(Error::unexpected_token("null", token, self.index)), - } - } - - fn deserialize_unit_struct( - self, - name: &'static str, - visitor: V, - ) -> Result - where - V: de::Visitor<'de>, - { - todo!() - } - - fn deserialize_newtype_struct( - self, - name: &'static str, - visitor: V, - ) -> Result - where - V: de::Visitor<'de>, - { - todo!() - } - - fn deserialize_seq(self, visitor: V) -> Result - where - V: de::Visitor<'de>, - { - match self.whitespace_or_eof()? { b'[' => { self.advance(); visitor.visit_seq(SeqAccess::new(self)) } - token => Err(Error::unexpected_token("[", token, self.index)), - } - } - - fn deserialize_tuple(self, len: usize, visitor: V) -> Result - where - V: de::Visitor<'de>, - { - todo!() - } - - fn deserialize_tuple_struct( - self, - name: &'static str, - len: usize, - visitor: V, - ) -> Result - where - V: de::Visitor<'de>, - { - todo!() - } - - fn deserialize_map(self, visitor: V) -> Result - where - V: de::Visitor<'de>, - { - match self.whitespace_or_eof()? { b'{' => { self.advance(); visitor.visit_map(MapAccess::new(self)) } - token => Err(Error::unexpected_token("{", token, self.index)), + token => Err(Error::unexpected_token( + "one of: any", // FIXME: include more info + token, + self.index, + )), } } - fn deserialize_struct( - self, - name: &'static str, - fields: &'static [&'static str], - visitor: V, - ) -> Result - where - V: de::Visitor<'de>, - { - self.deserialize_map(visitor) - } - - fn deserialize_enum( - self, - name: &'static str, - variants: &'static [&'static str], - visitor: V, - ) -> Result - where - V: de::Visitor<'de>, - { - todo!() - } - - fn deserialize_identifier(self, visitor: V) -> Result - where - V: de::Visitor<'de>, - { - todo!() - } - - fn deserialize_ignored_any(self, visitor: V) -> Result - where - V: de::Visitor<'de>, - { - todo!() + forward_to_deserialize_any! { + bool i8 i16 i32 i64 i128 u8 u16 u32 u64 u128 f32 f64 char str string + bytes byte_buf option unit unit_struct newtype_struct seq tuple + tuple_struct map struct enum identifier ignored_any } } @@ -577,7 +332,7 @@ impl<'a, 'de> de::MapAccess<'de> for MapAccess<'a, 'de> { return Ok(None); } b'\'' => { - todo!() + unimplemented!("escaped keys") } _ => { let start = self.de.index; @@ -585,10 +340,16 @@ impl<'a, 'de> de::MapAccess<'de> for MapAccess<'a, 'de> { loop { match self.de.peek()? { Some(byte) => { - if byte.is_ascii_whitespace() || matches!(byte, b'.' | b'=') { + if byte.is_ascii_whitespace() || matches!(byte, b'=' | b'}') { break; } + if byte == b'.' { + self.de.advance(); + + unimplemented!("chains") + } + self.de.advance(); } None => break, From ddb1a970dc8ebbd1d4ed552c0fffc94266c1686d Mon Sep 17 00:00:00 2001 From: Matilde Morrone Date: Fri, 18 Apr 2025 12:49:01 +0200 Subject: [PATCH 26/26] Implement initial input parsing --- src/de.rs | 47 +++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 45 insertions(+), 2 deletions(-) diff --git a/src/de.rs b/src/de.rs index ba8134b..49eb33a 100644 --- a/src/de.rs +++ b/src/de.rs @@ -1,6 +1,6 @@ use std::collections::HashMap; -use serde::{de, forward_to_deserialize_any}; +use serde::{de, forward_to_deserialize_any, Deserialize}; use crate::{Error, Result, Value}; @@ -146,7 +146,50 @@ impl<'de> Deserializer<'de> { loop { match self.whitespace_or_eof()? { b'$' => { - unimplemented!("key parsing") + self.advance(); + + let start = self.index; + + loop { + match self.peek()? { + Some(byte) => { + if byte.is_ascii_whitespace() { + break; + } + + // FIXME: check the input is alphanumeric + // FIXME: check the inputs starts with a letter + + self.advance(); + } + None => break, + } + } + + let end = self.index; + + if start == end { + // return Err(Error::EmptyKey); + } + + // TODO: better error handling + let input = std::str::from_utf8(&self.bytes[start..end]) + .map_err(|_| Error::InvalidUtf8)?; + + match self.whitespace_or_eof()? { + b'=' => { + self.advance(); // Skip the equals sign + + let value = Value::deserialize(&mut *self)?; + + self.variables.insert(input.to_owned(), value); + } + token => { + return Err(Error::unexpected_token( + "=", token, self.index, + )) + } + } } b'}' => { self.advance();