Skip to content

Commit 3ba65c8

Browse files
committed
Merge branch 'main' into backtrack
2 parents 0f9aa72 + b23b903 commit 3ba65c8

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

57 files changed

+1499
-1322
lines changed

.github/workflows/python-release-conda.yml

Lines changed: 0 additions & 129 deletions
This file was deleted.

.github/workflows/python-release.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ jobs:
3737
platform: linux
3838
- os: windows
3939
ls: dir
40-
interpreter: 3.9 3.10 3.11 3.12 3.13 pypy3.8 pypy3.9 pypy3.10
40+
interpreter: 3.9 3.10 3.11 3.12 3.13 pypy3.9 pypy3.10
4141
- os: windows
4242
ls: dir
4343
target: x86_64
@@ -54,7 +54,7 @@ jobs:
5454
# interpreter: 3.11 3.12
5555
- os: macos
5656
target: aarch64
57-
interpreter: 3.9 3.10 3.11 3.12 3.13 pypy3.8 pypy3.9 pypy3.10
57+
interpreter: 3.9 3.10 3.11 3.12 3.13 pypy3.9 pypy3.10
5858
- os: ubuntu
5959
platform: linux
6060
target: i686

.github/workflows/python.yml

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,7 @@ jobs:
5555
steps:
5656
- name: Checkout repository
5757
uses: actions/checkout@v4
58+
5859

5960
- name: Install Rust
6061
uses: actions-rs/toolchain@v1
@@ -76,7 +77,7 @@ jobs:
7677

7778

7879
- name: Cache Cargo Registry
79-
uses: actions/cache@v1
80+
uses: actions/cache@v4
8081
with:
8182
path: ~/.cargo/registry
8283
key: ${{ runner.os }}-cargo-registry-${{ hashFiles('**/Cargo.lock') }}
@@ -107,7 +108,7 @@ jobs:
107108
uses: actions-rs/cargo@v1
108109
with:
109110
command: audit
110-
args: -D warnings -f ./bindings/python/Cargo.lock
111+
args: -D warnings -f ./bindings/python/Cargo.lock --ignore RUSTSEC-2024-0436 --ignore RUSTSEC-2025-0014
111112

112113
- name: Install
113114
working-directory: ./bindings/python

.github/workflows/rust.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ jobs:
1010
build:
1111
runs-on: ${{ matrix.os }}
1212
env:
13-
MACOSX_DEPLOYMENT_TARGET: 10.11
13+
MACOSX_DEPLOYMENT_TARGET: 10.12
1414
strategy:
1515
matrix:
1616
os: [ubuntu-latest, windows-latest, macOS-latest]
@@ -94,7 +94,7 @@ jobs:
9494
uses: actions-rs/cargo@v1
9595
with:
9696
command: audit
97-
args: -D warnings -f ./tokenizers/Cargo.lock
97+
args: -D warnings -f ./tokenizers/Cargo.lock --ignore RUSTSEC-2024-0436 --ignore RUSTSEC-2025-0014
9898

9999
# Verify that Readme.md is up to date.
100100
- name: Make sure, Readme generated from lib.rs matches actual Readme

.github/workflows/trufflehog.yml

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,4 +12,7 @@ jobs:
1212
with:
1313
fetch-depth: 0
1414
- name: Secret Scanning
15-
uses: trufflesecurity/trufflehog@main
15+
uses: trufflesecurity/trufflehog@853e1e8d249fd1e29d0fcc7280d29b03df3d643d
16+
with:
17+
# exclude buggy postgres detector that is causing false positives and not relevant to our codebase
18+
extra_args: --results=verified,unknown --exclude-detectors=postgres

bindings/node/Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
authors = ["Nicolas Patry <nicolas@huggingface.co>"]
33
edition = "2021"
44
name = "node"
5-
version = "0.21.0-dev.0"
5+
version = "0.21.2-dev.0"
66

77
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
88

bindings/node/lib/bindings/encoding.test.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -122,7 +122,7 @@ describe('Encoding', () => {
122122
expect(indexes).toEqual([3, 5])
123123
})
124124

125-
it('returns the corrent indexes with pair sequences', () => {
125+
it('returns the correct indexes with pair sequences', () => {
126126
expect(encodingDual.wordToTokens(3, 0)).toEqual([3, 5])
127127
expect(encodingDual.wordToTokens(3, 1)).toEqual([8, 9])
128128
})

bindings/node/yarn.lock

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1835,12 +1835,12 @@ __metadata:
18351835
linkType: hard
18361836

18371837
"brace-expansion@npm:^1.1.7":
1838-
version: 1.1.11
1839-
resolution: "brace-expansion@npm:1.1.11"
1838+
version: 1.1.12
1839+
resolution: "brace-expansion@npm:1.1.12"
18401840
dependencies:
18411841
balanced-match: ^1.0.0
18421842
concat-map: 0.0.1
1843-
checksum: faf34a7bb0c3fcf4b59c7808bc5d2a96a40988addf2e7e09dfbb67a2251800e0d14cd2bfc1aa79174f2f5095c54ff27f46fb1289fe2d77dac755b5eb3434cc07
1843+
checksum: 12cb6d6310629e3048cadb003e1aca4d8c9bb5c67c3c321bafdd7e7a50155de081f78ea3e0ed92ecc75a9015e784f301efc8132383132f4f7904ad1ac529c562
18441844
languageName: node
18451845
linkType: hard
18461846

bindings/python/Cargo.toml

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[package]
22
name = "tokenizers-python"
3-
version = "0.21.0-dev.0"
3+
version = "0.21.2-dev.0"
44
authors = ["Anthony MOI <m.anthony.moi@gmail.com>"]
55
edition = "2021"
66

@@ -14,17 +14,17 @@ serde = { version = "1.0", features = ["rc", "derive"] }
1414
serde_json = "1.0"
1515
libc = "0.2"
1616
env_logger = "0.11"
17-
pyo3 = { version = "0.23", features = ["abi3", "abi3-py39", "py-clone"] }
18-
numpy = "0.23"
17+
pyo3 = { version = "0.25", features = ["abi3", "abi3-py39", "py-clone"] }
18+
numpy = "0.25"
1919
ndarray = "0.16"
20-
itertools = "0.12"
20+
itertools = "0.14"
2121

2222
[dependencies.tokenizers]
2323
path = "../../tokenizers"
2424

2525
[dev-dependencies]
2626
tempfile = "3.10"
27-
pyo3 = { version = "0.23", features = ["auto-initialize"] }
27+
pyo3 = { version = "0.25", features = ["auto-initialize"] }
2828

2929
[features]
30-
defaut = ["pyo3/extension-module"]
30+
default = ["pyo3/extension-module"]

bindings/python/py_src/tokenizers/decoders/__init__.pyi

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ class BPEDecoder(Decoder):
3333
3434
Args:
3535
suffix (:obj:`str`, `optional`, defaults to :obj:`</w>`):
36-
The suffix that was used to caracterize an end-of-word. This suffix will
36+
The suffix that was used to characterize an end-of-word. This suffix will
3737
be replaced by whitespaces during the decoding
3838
"""
3939
def __init__(self, suffix="</w>"):

0 commit comments

Comments
 (0)