diff --git a/.asf.yaml b/.asf.yaml index a151b23..5879ab4 100644 --- a/.asf.yaml +++ b/.asf.yaml @@ -5,9 +5,9 @@ # to you under the Apache License, Version 2.0 (the # "License"); you may not use this file except in compliance # with the License. You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, # software distributed under the License is distributed on an # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY diff --git a/.editorconfig b/.editorconfig new file mode 100644 index 0000000..2990175 --- /dev/null +++ b/.editorconfig @@ -0,0 +1,28 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +root = true + +[*] +end_of_line = lf +indent_style = space +insert_final_newline = true +trim_trailing_whitespace = true + +[*.toml] +indent_size = tab +tab_width = 2 diff --git a/.gitattributes b/.gitattributes index be3a21b..8d91de4 100644 --- a/.gitattributes +++ b/.gitattributes @@ -1,4 +1,3 @@ -# # Licensed to the Apache Software Foundation (ASF) under one or more # contributor license agreements. See the NOTICE file distributed with # this work for additional information regarding copyright ownership. @@ -13,7 +12,6 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# # The default behavior, which overrides 'core.autocrlf', is to use Git's # built-in heuristics to determine whether a particular file is text or binary. diff --git a/.github/semantic.yml b/.github/semantic.yml new file mode 100644 index 0000000..e325733 --- /dev/null +++ b/.github/semantic.yml @@ -0,0 +1,45 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# The pull request's title should be fulfilled the following pattern: +# +# [optional scope]: +# +# ... where valid types and scopes can be found below; for example: +# +# build(maven): One level down for native profile +# +# More about configurations on https://github.com/Ezard/semantic-prs#configuration + +enabled: true + +titleOnly: true + +types: + - feat + - fix + - docs + - style + - refactor + - perf + - test + - build + - ci + - chore + - revert + +targetUrl: https://github.com/apache/datasketches-rust/blob/main/.github/semantic.yml diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 0000000..f1308e1 --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,118 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +name: CI +on: + pull_request: + branches: [ main ] + push: + branches: [ main ] + +# Concurrency strategy: +# github.workflow: distinguish this workflow from others +# github.event_name: distinguish `push` event from `pull_request` event +# github.event.number: set to the number of the pull request if `pull_request` event +# github.run_id: otherwise, it's a `push` event, only cancel if we rerun the workflow +# +# Reference: +# https://docs.github.com/en/actions/using-jobs/using-concurrency +# https://docs.github.com/en/actions/learn-github-actions/contexts#github-context +concurrency: + group: ${{ github.workflow }}-${{ github.event_name }}-${{ github.event.number || github.run_id }} + cancel-in-progress: true +jobs: + check: + name: Check + runs-on: ubuntu-24.04 + steps: + - uses: actions/checkout@v6 + - name: Delete rust-toolchain.toml + run: rm rust-toolchain.toml + - name: Install toolchain + uses: dtolnay/rust-toolchain@nightly + with: + components: rustfmt,clippy + - uses: Swatinem/rust-cache@v2 + - uses: taiki-e/install-action@v2 + with: + tool: typos-cli,taplo-cli,hawkeye + - name: Check all + run: | + hawkeye check + taplo format --check + typos + cargo +nightly fmt --all -- --check + cargo +nightly clippy --all-targets --all-features -- -D warnings + + msrv: + name: Resolve MSRV + runs-on: ubuntu-24.04 + outputs: + rust-versions: ${{ steps.metadata.outputs.rust-versions }} + steps: + - uses: actions/checkout@v6 + - id: metadata + run: | + msrv=$(yq '.package.rust-version' Cargo.toml) + echo "MSRV: $msrv" + echo "rust-versions=[\"${msrv}\", \"stable\"]" >> "$GITHUB_OUTPUT" + + test: + name: Run tests + needs: msrv + strategy: + matrix: + os: [ ubuntu-24.04, macos-14, windows-2022 ] + rust-version: ${{ fromJson(needs.msrv.outputs.rust-versions) }} + runs-on: ${{ matrix.os }} + steps: + - uses: actions/checkout@v6 + - name: Delete rust-toolchain.toml + run: rm rust-toolchain.toml + - uses: Swatinem/rust-cache@v2 + - name: Install toolchain + uses: dtolnay/rust-toolchain@master + with: + toolchain: ${{ matrix.rust-version }} + - name: Build + run: cargo build --workspace --all-features --bins --tests --examples --benches --lib + - name: Run unit tests + shell: bash + run: cargo test --all-features -- --nocapture + - name: Run examples + shell: bash + run: | + set -x + cargo run --example hll_usage + + required: + name: Required + runs-on: ubuntu-24.04 + if: ${{ always() }} + needs: + - check + - test + steps: + - name: Guardian + run: | + if [[ ! ( \ + "${{ needs.check.result }}" == "success" \ + && "${{ needs.test.result }}" == "success" \ + ) ]]; then + echo "Required jobs haven't been completed successfully." + exit -1 + fi diff --git a/.gitignore b/.gitignore index 4f47de0..36d5e44 100644 --- a/.gitignore +++ b/.gitignore @@ -1,85 +1,38 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at # -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. +# http://www.apache.org/licenses/LICENSE-2.0 # +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. # Eclipse project files .classpath .project -.settings/ +.settings .checkstyle # IntelliJ project files -*.idea +.idea **/*.iml *.ipr *.iws # VSCode project files -**/.vscode/ - -# Additional tools -.clover/ +.vscode +!.vscode/settings.json # OSX files **/.DS_Store -# Compiler output, class files -*.class -bin/ - -# Log file -*.log - -# BlueJ files -*.ctxt - -# Mobile Tools for Java (J2ME) -.mtj.tmp/ - -# Package Files # -*.jar -*.war -*.ear -*.zip -*.tar.gz -*.rar - -# virtual machine crash logs, see http://www.java.com/en/download/help/error_hotspot.xml -hs_err_pid* - -#Test config and output -test-output/ -local/ -reports/ -.pmd -tmp/ - # Build artifacts -target/ -out/ -build/ -jarsIn/ -build.xml -*.properties -*.releaseBackup -*.next -*.tag -doc/ - -# Jekyll -_site/ -_* -_*/ \ No newline at end of file +**/target diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md new file mode 100644 index 0000000..a041789 --- /dev/null +++ b/CONTRIBUTING.md @@ -0,0 +1,43 @@ +# Contributing + +Thank you for contributing to Apache DataSketches! + +The goal of this document is to provide everything you need to start contributing to this core Rust library. + +## Your First Contribution + +1. [Fork the DataSketches repository](https://github.com/apache/datasketches-rust/fork) in your own GitHub account. +2. [Create a new Git branch](https://help.github.com/en/github/collaborating-with-issues-and-pull-requests/creating-and-deleting-branches-within-your-repository). +3. Make your changes. +4. [Submit the branch as a pull request](https://help.github.com/en/github/collaborating-with-issues-and-pull-requests/creating-a-pull-request-from-a-fork) to the upstream repo. A DataSketches team member should comment and/or review your pull request within a few days. Although, depending on the circumstances, it may take longer. + +## Setup + +This repo develops Apache® DataSketches™ Core Rust Library Component. To build this project, you will need to set up Rust development first. We highly recommend using [rustup](https://rustup.rs/) for the setup process. + +For Linux or macOS users, use the following command: + +```shell +curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh +``` + +For Windows users, download `rustup-init.exe` from [here](https://win.rustup.rs/x86_64) instead. + +Rustup will read the `rust-toolchain.toml` file and set up everything else automatically. To ensure that everything works correctly, run `cargo version` under the root directory: + +```shell +cargo version +# cargo 1.85.0 ( 2024-12-31) +``` + +To keep code style consistent, we use the following tools: + +* Nightly `rustfmt` for code formatting: `cargo +nightly fmt --all -- --check` +* Nightly `clippy` for linting: `cargo +nightly clippy --all-targets --all-features -- -D warnings` +* [`typos`](https://github.com/crate-ci/typos) for spell checking: `cargo install typos-cli` and then `typos` +* [`taplo`](https://taplo.tamasfe.dev/) for checking `toml` files: `cargo install taplo-cli` and then `taplo check` +* [`hawkeye`](https://github.com/korandoru/hawkeye) for checking license header: `cargo install hawkeye` and then `hawkeye check` + +## Code of Conduct + +We expect all community members to follow our [Code of Conduct](https://www.apache.org/foundation/policies/conduct.html). diff --git a/Cargo.toml b/Cargo.toml index b216201..f2c7829 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -18,7 +18,21 @@ [package] name = "datasketches" version = "0.1.0" + edition = "2024" +rust-version = "1.85.0" + +categories = ["data-structures", "algorithms"] +description = "A software library of stochastic streaming algorithms (a.k.a. sketches)" +homepage = "https://datasketches.apache.org" +keywords = ["sketch", "hyperloglog", "probabilistic"] +license = "Apache-2.0" +readme = "README.md" +repository = "https://github.com/apache/datasketches-rust" + +[package.metadata.docs.rs] +all-features = true +rustdoc-args = ["--cfg", "docsrs"] [dependencies] -mur3 = "0.1" +mur3 = { version = "0.1.0" } diff --git a/README.md b/README.md index 7675cae..c74aa91 100644 --- a/README.md +++ b/README.md @@ -17,13 +17,13 @@ under the License. --> -# Apache® DataSketches™ Core Rust Library Component +# Apache® DataSketches™ Core Rust Library Component > [!WARNING] > > This repository is under early development. Use it with caution! -This is the core Rust component of the DataSketches library. It contains a subset of the sketching algorithms and can be accessed directly from user applications. +This is the core Rust component of the DataSketches library. It contains a subset of the sketching algorithms and can be accessed directly from user applications. Note that we have parallel core library components for Java, C++, Python, and Go implementations of many of the same sketch algorithms: diff --git a/licenserc.toml b/licenserc.toml new file mode 100644 index 0000000..1937a22 --- /dev/null +++ b/licenserc.toml @@ -0,0 +1,20 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +headerPath = "Apache-2.0-ASF.txt" + +includes = ['**/*.rs', '**/*.yml', '**/*.yaml', '**/*.toml'] diff --git a/rust-toolchain.toml b/rust-toolchain.toml new file mode 100644 index 0000000..a260d8d --- /dev/null +++ b/rust-toolchain.toml @@ -0,0 +1,20 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +[toolchain] +channel = "1.85.0" +components = ["rustfmt", "clippy", "rust-analyzer"] diff --git a/rustfmt.toml b/rustfmt.toml new file mode 100644 index 0000000..887de93 --- /dev/null +++ b/rustfmt.toml @@ -0,0 +1,19 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +edition = "2024" +reorder_imports = true diff --git a/src/hll/aux_map.rs b/src/hll/aux_map.rs index 520be6d..33e2fdb 100644 --- a/src/hll/aux_map.rs +++ b/src/hll/aux_map.rs @@ -101,7 +101,7 @@ impl AuxMap { let index = self.find(slot); match index { FindResult::Found(_) => { - // Invariant: Array4 always check existance before inserting + // Invariant: Array4 always check existence before inserting // a new value on the same slot. unreachable!("slot {} already exists in aux map", slot); } @@ -130,7 +130,7 @@ impl AuxMap { self.entries[idx] = pack_coupon(slot, value); } FindResult::Empty(_) => { - // Invariant: Array4 always check existance before replacing + // Invariant: Array4 always check existence before replacing // an old value on the same slot. unreachable!("slot {} not found in aux map", slot); } @@ -165,7 +165,7 @@ impl AuxMap { if probe == start { // Invariant: AuxMap::insert is responsible for - // growing the AuxMap when a new entry is is inserted + // growing the AuxMap when a new entry is inserted // causing the map to be full. unreachable!("AuxMap full; no empty slots"); } diff --git a/src/lib.rs b/src/lib.rs index 5b64fac..07ace55 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -23,6 +23,7 @@ //! //! This library is divided into modules that constitute distinct groups of functionality. +#![cfg_attr(docsrs, feature(doc_cfg))] #![deny(missing_docs)] pub mod error; diff --git a/taplo.toml b/taplo.toml new file mode 100644 index 0000000..a657363 --- /dev/null +++ b/taplo.toml @@ -0,0 +1,20 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +include = ["Cargo.toml", "**/*.toml"] + +# More about configurations on https://taplo.tamasfe.dev/configuration/file.html diff --git a/typos.toml b/typos.toml new file mode 100644 index 0000000..c27efec --- /dev/null +++ b/typos.toml @@ -0,0 +1,22 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +[default.extend-words] +"NUMER" = "NUMER" + +[files] +extend-exclude = []