From a9f23f8ca34914dc4bd5f314ac52ccf669def0fb Mon Sep 17 00:00:00 2001 From: "F. G. Dorais" Date: Mon, 24 Nov 2025 17:59:53 -0500 Subject: [PATCH 1/2] chore: remove deprecations --- UnicodeBasic.lean | 141 -------------------- UnicodeBasic/TableLookup.lean | 5 - UnicodeBasic/Types.lean | 243 ---------------------------------- UnicodeData/Basic.lean | 28 ---- 4 files changed, 417 deletions(-) diff --git a/UnicodeBasic.lean b/UnicodeBasic.lean index e587cefb4..e2d959cc8 100644 --- a/UnicodeBasic.lean +++ b/UnicodeBasic.lean @@ -114,32 +114,11 @@ where .Sk, .Ll, .Ll, .Ll, .Ll, .Ll, .Ll, .Ll, .Ll, .Ll, .Ll, .Ll, .Ll, .Ll, .Ll, .Ll, .Ll, .Ll, .Ll, .Ll, .Ll, .Ll, .Ll, .Ll, .Ll, .Ll, .Ll, .Ps, .Sm, .Po, .Sm, .Cc] -set_option linter.deprecated false in -@[deprecated Unicode.getGC (since := "1.2.0")] -def getGeneralCategory (char : Char) : GeneralCategory := - .ofGC! (getGC char) - instance : Membership Char GC where mem cat char := getGC char ⊆ cat instance (char : Char) (cat : GC) : Decidable (char ∈ cat) := inferInstanceAs (Decidable (_ ⊆ _)) -set_option linter.deprecated false in -@[deprecated "use `char ∈ category` instead" (since := "1.2.0")] -def isInGeneralCategory (char : Char) (category : GeneralCategory) : Bool := - match category, getGeneralCategory char with - | ⟨major, none⟩, ⟨charMajor, _⟩ => major == charMajor - | ⟨_, some .casedLetter⟩, ⟨_, some .lowercaseLetter⟩ => true - | ⟨_, some .casedLetter⟩, ⟨_, some .titlecaseLetter⟩ => true - | ⟨_, some .casedLetter⟩, ⟨_, some .uppercaseLetter⟩ => true - | ⟨_, some .casedLetter⟩, _ => false - | ⟨_, some .groupPunctuation⟩, ⟨_, some .openPunctuation⟩ => true - | ⟨_, some .groupPunctuation⟩, ⟨_, some .closePunctuation⟩ => true - | ⟨_, some .groupPunctuation⟩, _ => false - | ⟨_, some .quotePunctuation⟩, ⟨_, some .initialPunctuation⟩ => true - | ⟨_, some .quotePunctuation⟩, ⟨_, some .finalPunctuation⟩ => true - | ⟨_, some .quotePunctuation⟩, _ => false - | cat, charCat => cat == charCat namespace GeneralCategory @@ -150,33 +129,21 @@ namespace GeneralCategory Unicode Property: `General_Category=L` -/ abbrev isLetter (char : Char) : Bool := char ∈ Unicode.GC.L -@[deprecated "c ∈ Unicode.GC.L" (since := "1.2.0")] -protected abbrev isL := isLetter - /-- Check if lowercase letter character (`Ll`) Unicode Property: `General_Category=Ll` -/ abbrev isLowercaseLetter (char : Char) : Bool := char ∈ Unicode.GC.Ll -@[deprecated "c ∈ Unicode.GC.Ll" (since := "1.2.0")] -protected abbrev isLl := isLowercaseLetter - /-- Check if titlecase letter character (`Lt`) Unicode Property: `General_Category=Lt` -/ abbrev isTitlecaseLetter (char : Char) : Bool := char ∈ Unicode.GC.Lt -@[deprecated "c ∈ Unicode.GC.Lt" (since := "1.2.0")] -protected abbrev isLt := isTitlecaseLetter - /-- Check if uppercase letter character (`Lu`) Unicode Property: `General_Category=Lu` -/ abbrev isUppercaseLetter (char : Char) : Bool := char ∈ Unicode.GC.Lu -@[deprecated "c ∈ Unicode.GC.Lu" (since := "1.2.0")] -protected abbrev isLu := isUppercaseLetter - /-- Check if cased letter character (`LC`) This is a derived category (`L = Lu | Ll | Lt`). @@ -184,25 +151,16 @@ protected abbrev isLu := isUppercaseLetter Unicode Property: `General_Category=LC` -/ abbrev isCasedLetter (char : Char) : Bool := char ∈ Unicode.GC.LC -@[deprecated "c ∈ Unicode.GC.LC" (since := "1.2.0")] -protected abbrev isLC := isCasedLetter - /-- Check if modifier letter character (`Lm`) Unicode Property: `General_Category=Lm`-/ abbrev isModifierLetter (char : Char) : Bool := char ∈ Unicode.GC.Lm -@[deprecated "c ∈ Unicode.GC.Lm" (since := "1.2.0")] -protected abbrev isLm := isModifierLetter - /-- Check if other letter character (`Lo`) Unicode Property: `General_Category=Lo`-/ abbrev isOtherLetter (char : Char) : Bool := char ∈ Unicode.GC.Lo -@[deprecated "c ∈ Unicode.GC.Lo" (since := "1.2.0")] -protected abbrev isLo := isOtherLetter - /-- Check if mark character (`M`) This is a derived category (`M = Mn | Mc | Me`). @@ -210,33 +168,21 @@ protected abbrev isLo := isOtherLetter Unicode Property: `General_Category=M` -/ abbrev isMark (char : Char) : Bool := char ∈ Unicode.GC.M -@[deprecated "c ∈ Unicode.GC.M" (since := "1.2.0")] -protected abbrev isM := isMark - /-- Check if nonspacing combining mark character (`Mn`) Unicode Property: `General_Category=Mn` -/ abbrev isNonspacingMark (char : Char) : Bool := char ∈ Unicode.GC.Mn -@[deprecated "c ∈ Unicode.GC.Mn" (since := "1.2.0")] -protected abbrev isMn := isNonspacingMark - /-- Check if spacing combining mark character (`Mc`) Unicode Property: `General_Category=Mc` -/ abbrev isSpacingMark (char : Char) : Bool := char ∈ Unicode.GC.Mc -@[deprecated "c ∈ Unicode.GC.Mc" (since := "1.2.0")] -protected abbrev isMc := isSpacingMark - /-- Check if enclosing combining mark character (`Me`) Unicode Property: `General_Category=Me` -/ abbrev isEnclosingMark (char : Char) : Bool := char ∈ Unicode.GC.Me -@[deprecated "c ∈ Unicode.GC.Me" (since := "1.2.0")] -protected abbrev isMe := isEnclosingMark - /-- Check if number character (`N`) This is a derived category (`N = Nd | Nl | No`). @@ -244,33 +190,21 @@ protected abbrev isMe := isEnclosingMark Unicode Property: `General_Category=N` -/ abbrev isNumber (char : Char) : Bool := char ∈ Unicode.GC.N -@[deprecated "c ∈ Unicode.GC.N" (since := "1.2.0")] -protected abbrev isN := isNumber - /-- Check if decimal number character (`Nd`) Unicode Property: `General_Category=Nd` -/ abbrev isDecimalNumber (char : Char) : Bool := char ∈ Unicode.GC.Nd -@[deprecated "c ∈ Unicode.GC.Nd" (since := "1.2.0")] -protected abbrev isNd := isDecimalNumber - /-- Check if letter number character (`Nl`) Unicode Property: `General_Category=Nl` -/ abbrev isLetterNumber (char : Char) : Bool := char ∈ Unicode.GC.Nl -@[deprecated "c ∈ Unicode.GC.Nl" (since := "1.2.0")] -protected abbrev isNl := isLetterNumber - /-- Check if other number character (`No`) Unicode Property: `General_Category=No` -/ abbrev isOtherNumber (char : Char) : Bool := char ∈ Unicode.GC.No -@[deprecated "c ∈ Unicode.GC.No" (since := "1.2.0")] -protected abbrev isNo := isOtherNumber - /-- Check if punctuation character (`P`) This is a derived category (`P = Pc | Pd | Ps | Pe | Pi | Pf | Po`). @@ -278,25 +212,16 @@ protected abbrev isNo := isOtherNumber Unicode Property: `General_Category=P` -/ abbrev isPunctuation (char : Char) : Bool := char ∈ Unicode.GC.P -@[deprecated "c ∈ Unicode.GC.P" (since := "1.2.0")] -protected abbrev isP := isPunctuation - /-- Check if connector punctuation character (`Pc`) Unicode Property: `General_Category=Pc` -/ abbrev isConnectorPunctuation (char : Char) : Bool := char ∈ Unicode.GC.Pc -@[deprecated "c ∈ Unicode.GC.Pc" (since := "1.2.0")] -protected abbrev isPc := isConnectorPunctuation - /-- Check if dash punctuation character (`Pd`) Unicode Property: `General_Category=Pd` -/ abbrev isDashPunctuation (char : Char) : Bool := char ∈ Unicode.GC.Pd -@[deprecated "c ∈ Unicode.GC.Pd" (since := "1.2.0")] -protected abbrev isPd := isDashPunctuation - /-- Check if grouping punctuation character (`PG`) This is a derived category (`PG = Ps | Pe`). @@ -304,25 +229,16 @@ protected abbrev isPd := isDashPunctuation Unicode Property: `General_Category=PG` -/ abbrev isGroupPunctuation (char : Char) : Bool := char ∈ Unicode.GC.PG -@[deprecated "c ∈ Unicode.GC.PG" (since := "1.2.0")] -protected abbrev isPG := isGroupPunctuation - /-- Check if open punctuation character (`Ps`) Unicode Property: `General_Category=Ps` -/ abbrev isOpenPunctuation (char : Char) : Bool := char ∈ Unicode.GC.Ps -@[deprecated "c ∈ Unicode.GC.Ps" (since := "1.2.0")] -protected abbrev isPs := isOpenPunctuation - /-- Check if close punctuation character (`Pe`) Unicode Property: `General_Category=Pe` -/ abbrev isClosePunctuation (char : Char) : Bool := char ∈ Unicode.GC.Pe -@[deprecated "c ∈ Unicode.GC.Pe" (since := "1.2.0")] -protected abbrev isPe := isClosePunctuation - /-- Check if quoting punctuation character (`PQ`) This is a derived category (`PQ = Pi | Pf`). @@ -330,33 +246,21 @@ protected abbrev isPe := isClosePunctuation Unicode Property: `General_Category=PQ` -/ abbrev isQuotePunctuation (char : Char) : Bool := char ∈ Unicode.GC.PQ -@[deprecated "c ∈ Unicode.GC.PQ" (since := "1.2.0")] -protected abbrev isPQ := isQuotePunctuation - /-- Check if initial punctuation character (`Pi`) Unicode Property: `General_Category=Pi` -/ abbrev isInitialPunctuation (char : Char) : Bool := char ∈ Unicode.GC.Pi -@[deprecated "c ∈ Unicode.GC.Pi" (since := "1.2.0")] -protected abbrev isPi := isInitialPunctuation - /-- Check if initial punctuation character (`Pf`) Unicode Property: `General_Category=Pf` -/ abbrev isFinalPunctuation (char : Char) : Bool := char ∈ Unicode.GC.Pf -@[deprecated "c ∈ Unicode.GC.Pf" (since := "1.2.0")] -protected abbrev isPf := isFinalPunctuation - /-- Check if other punctuation character (`Po`) Unicode Property: `General_Category=Po` -/ abbrev isOtherPunctuation (char : Char) : Bool := char ∈ Unicode.GC.Po -@[deprecated "c ∈ Unicode.GC.Po" (since := "1.2.0")] -protected abbrev isPo := isOtherPunctuation - /-- Check if symbol character (`S`) This is a derived category (`S = Sm | Sc | Sk | So`). @@ -364,41 +268,26 @@ protected abbrev isPo := isOtherPunctuation Unicode Property: `General_Category=S` -/ abbrev isSymbol (char : Char) : Bool := char ∈ Unicode.GC.S -@[deprecated "c ∈ Unicode.GC.S" (since := "1.2.0")] -protected abbrev isS := isSymbol - /-- Check if math symbol character (`Sm`) Unicode Property: `General_Category=Sm` -/ abbrev isMathSymbol (char : Char) : Bool := char ∈ Unicode.GC.Sm -@[deprecated "c ∈ Unicode.GC.Sm" (since := "1.2.0")] -protected abbrev isSm := isMathSymbol - /-- Check if currency symbol character (`Sc`) Unicode Property: `General_Category=Sc` -/ abbrev isCurrencySymbol (char : Char) : Bool := char ∈ Unicode.GC.Sc -@[deprecated "c ∈ Unicode.GC.Sc" (since := "1.2.0")] -protected abbrev isSc := isCurrencySymbol - /-- Check if modifier symbol character (`Sk`) Unicode Property: `General_Category=Sk` -/ abbrev isModifierSymbol (char : Char) : Bool := char ∈ Unicode.GC.Sk -@[deprecated "c ∈ Unicode.GC.Sk" (since := "1.2.0")] -protected abbrev isSk := isModifierSymbol - /-- Check if other symbol character (`So`) Unicode Property: `General_Category=So` -/ abbrev isOtherSymbol (char : Char) : Bool := char ∈ Unicode.GC.So -@[deprecated "c ∈ Unicode.GC.So" (since := "1.2.0")] -protected abbrev isSo := isOtherSymbol - /-- Check if separator character (`Z`) This is a derived property (`Z = Zs | Zl | Zp`). @@ -406,33 +295,21 @@ protected abbrev isSo := isOtherSymbol Unicode Property: `General_Category=Z` -/ abbrev isSeparator (char : Char) : Bool := char ∈ Unicode.GC.Z -@[deprecated "c ∈ Unicode.GC.Z" (since := "1.2.0")] -protected abbrev isZ := isSeparator - /-- Check if space separator character (`Zs`) Unicode Property: `General_Category=Zs` -/ abbrev isSpaceSeparator (char : Char) : Bool := char ∈ Unicode.GC.Zs -@[deprecated "c ∈ Unicode.GC.Zs" (since := "1.2.0")] -protected abbrev isZs := isSpaceSeparator - /-- Check if line separator character (`Zl`) Unicode Property: `General_Category=Zl` -/ abbrev isLineSeparator (char : Char) : Bool := char ∈ Unicode.GC.Zl -@[deprecated "c ∈ Unicode.GC.Zl" (since := "1.2.0")] -protected abbrev isZl := isLineSeparator - /-- Check if paragraph separator character (`Zp`) Unicode Property: `General_Category=Zp` -/ abbrev isParagraphSeparator (char : Char) : Bool := char ∈ Unicode.GC.Zp -@[deprecated "c ∈ Unicode.GC.Zp" (since := "1.2.0")] -protected abbrev isZp := isParagraphSeparator - /-- Check if other character (`C`) This is a derived category (`C = Cc | Cf | Cs | Co | Cn`). @@ -440,25 +317,16 @@ protected abbrev isZp := isParagraphSeparator Unicode Property: `General_Category=C` -/ abbrev isOther (char : Char) : Bool := char ∈ Unicode.GC.C -@[deprecated "c ∈ Unicode.GC.C" (since := "1.2.0")] -protected abbrev isC := isOther - /-- Check if control character (`Cc`) Unicode Property: `General_Category=Cc` -/ abbrev isControl (char : Char) : Bool := char ∈ Unicode.GC.Cc -@[deprecated "c ∈ Unicode.GC.Cc" (since := "1.2.0")] -protected abbrev isCc := isControl - /-- Check if format character (`Cf`) Unicode Property: `General_Category=Cf` -/ abbrev isFormat (char : Char) : Bool := char ∈ Unicode.GC.Cf -@[deprecated "c ∈ Unicode.GC.Cf" (since := "1.2.0")] -protected abbrev isCf := isFormat - /-- Check if surrogate character (`Cs`) Does not actually occur since Lean does not regard surrogate code points as characters. @@ -466,25 +334,16 @@ protected abbrev isCf := isFormat Unicode Property: `General_Category=Cs` -/ abbrev isSurrogate (char : Char) : Bool := char ∈ Unicode.GC.Cs -@[deprecated "c ∈ Unicode.GC.Cs" (since := "1.2.0")] -protected abbrev isCs := isSurrogate - /-- Check if private use character (`Co`) Unicode Property: `General_Category=Co` -/ abbrev isPrivateUse (char : Char) : Bool := char ∈ Unicode.GC.Co -@[deprecated "c ∈ Unicode.GC.Co" (since := "1.2.0")] -protected abbrev isCo := isPrivateUse - /-- Check if unassigned character (`Cn`) Unicode Property: `General_Category=Cn` -/ abbrev isUnassigned (char : Char) : Bool := char ∈ Unicode.GC.Cn -@[deprecated "c ∈ Unicode.GC.Cn" (since := "1.2.0")] -protected abbrev isCn := isUnassigned - end GeneralCategory /-! diff --git a/UnicodeBasic/TableLookup.lean b/UnicodeBasic/TableLookup.lean index 14c3dc597..bf66cc859 100644 --- a/UnicodeBasic/TableLookup.lean +++ b/UnicodeBasic/TableLookup.lean @@ -180,11 +180,6 @@ where @[inline] def lookupGC (c : UInt32) : GC := CLib.lookupProp c |>.toUInt32 -set_option linter.deprecated false in -@[inline, deprecated Unicode.lookupGC (since := "v1.3.0")] -def lookupGeneralCategory (c : UInt32) : GeneralCategory := - .ofGC! (lookupGC c) - /-- Get name of a code point using lookup table Unicode property: `Name` -/ diff --git a/UnicodeBasic/Types.lean b/UnicodeBasic/Types.lean index 5dff2605a..f3b863de4 100644 --- a/UnicodeBasic/Types.lean +++ b/UnicodeBasic/Types.lean @@ -467,249 +467,6 @@ def ofString! (s : String.Slice) : GC := end GC -set_option linter.deprecated false in -@[deprecated Unicode.GC (since := "1.2.0")] -structure GeneralCategory : Type where - /-- Major general category of a code point -/ - major : MajorGeneralCategory - /-- Minor general category of a code point -/ - minor : Option (MinorGeneralCategory major) -deriving Inhabited, DecidableEq - -set_option linter.deprecated false in section - -/-- General category: letter (`L`) -/ -@[deprecated Unicode.GC.L (since := "1.2.0")] -protected def GeneralCategory.L : GeneralCategory := ⟨.letter, none⟩ -/-- General category: cased letter (`LC`) -/ -@[deprecated Unicode.GC.LC (since := "1.2.0")] -protected def GeneralCategory.LC : GeneralCategory := ⟨_, some .casedLetter⟩ -/-- General category: uppercase letter (`Lu`) -/ -@[deprecated Unicode.GC.Lu (since := "1.2.0")] -protected def GeneralCategory.Lu : GeneralCategory := ⟨_, some .uppercaseLetter⟩ -/-- General category: lowercase letter (`Ll`) -/ -@[deprecated Unicode.GC.Ll (since := "1.2.0")] -protected def GeneralCategory.Ll : GeneralCategory := ⟨_, some .lowercaseLetter⟩ -/-- General category: titlecase letter (`Lt`) -/ -@[deprecated Unicode.GC.Lt (since := "1.2.0")] -protected def GeneralCategory.Lt : GeneralCategory := ⟨_, some .titlecaseLetter⟩ -/-- General category: modifier letter (`Lm`) -/ -@[deprecated Unicode.GC.Lm (since := "1.2.0")] -protected def GeneralCategory.Lm : GeneralCategory := ⟨_, some .modifierLetter⟩ -/-- General category: other letter (`Lo`) -/ -@[deprecated Unicode.GC.Lo (since := "1.2.0")] -protected def GeneralCategory.Lo : GeneralCategory := ⟨_, some .otherLetter⟩ -/-- General category mark (`M`) -/ -@[deprecated Unicode.GC.M (since := "1.2.0")] -protected def GeneralCategory.M : GeneralCategory := ⟨.mark, none⟩ -/-- General category: nonspacing combining mark (`Mn`) -/ -@[deprecated Unicode.GC.Mn (since := "1.2.0")] -protected def GeneralCategory.Mn : GeneralCategory := ⟨_, some .nonspacingMark⟩ -/-- General category: spacing combining mark (`Mc`) -/ -@[deprecated Unicode.GC.Mc (since := "1.2.0")] -protected def GeneralCategory.Mc : GeneralCategory := ⟨_, some .spacingMark⟩ -/-- General category: enclosing combining mark (`Me`) -/ -@[deprecated Unicode.GC.Me (since := "1.2.0")] -protected def GeneralCategory.Me : GeneralCategory := ⟨_, some .enclosingMark⟩ -/-- General category: number (`N`) -/ -@[deprecated Unicode.GC.N (since := "1.2.0")] -protected def GeneralCategory.N : GeneralCategory := ⟨.number, none⟩ -/-- General category: decimal digit (`Nd`) -/ -@[deprecated Unicode.GC.Nd (since := "1.2.0")] -protected def GeneralCategory.Nd : GeneralCategory := ⟨_, some .decimalNumber⟩ -/-- General category: letter number (`Nl`) -/ -@[deprecated Unicode.GC.Nl (since := "1.2.0")] -protected def GeneralCategory.Nl : GeneralCategory := ⟨_, some .letterNumber⟩ -/-- General category: other number (`No`) -/ -@[deprecated Unicode.GC.No (since := "1.2.0")] -protected def GeneralCategory.No : GeneralCategory := ⟨_, some .otherNumber⟩ -/-- General category: punctuation (`P`) -/ -@[deprecated Unicode.GC.P (since := "1.2.0")] -protected def GeneralCategory.P : GeneralCategory := ⟨.punctuation, none⟩ -/-- General category: connector punctuation (`Pc`) -/ -@[deprecated Unicode.GC.Pc (since := "1.2.0")] -protected def GeneralCategory.Pc : GeneralCategory := ⟨_, some .connectorPunctuation⟩ -/-- General category: dash punctuation (`Pd`) -/ -@[deprecated Unicode.GC.Pd (since := "1.2.0")] -protected def GeneralCategory.Pd : GeneralCategory := ⟨_, some .dashPunctuation⟩ -/-- General category: grouping punctuation (`PG`) -/ -@[deprecated Unicode.GC.PG (since := "1.2.0")] -protected def GeneralCategory.PG : GeneralCategory := ⟨_, some .groupPunctuation⟩ -/-- General category: opening punctuation (`Ps`) -/ -@[deprecated Unicode.GC.Ps (since := "1.2.0")] -protected def GeneralCategory.Ps : GeneralCategory := ⟨_, some .openPunctuation⟩ -/-- General category: closing punctuation (`Pe`) -/ -@[deprecated Unicode.GC.Pe (since := "1.2.0")] -protected def GeneralCategory.Pe : GeneralCategory := ⟨_, some .closePunctuation⟩ -/-- General category: quoting punctuation (`PQ`) -/ -@[deprecated Unicode.GC.PQ (since := "1.2.0")] -protected def GeneralCategory.PQ : GeneralCategory := ⟨_, some .quotePunctuation⟩ -/-- General category: initial punctuation (`Pi`) -/ -@[deprecated Unicode.GC.Pi (since := "1.2.0")] -protected def GeneralCategory.Pi : GeneralCategory := ⟨_, some .initialPunctuation⟩ -/-- General category: final punctuation (`Pf`) -/ -@[deprecated Unicode.GC.Pf (since := "1.2.0")] -protected def GeneralCategory.Pf : GeneralCategory := ⟨_, some .finalPunctuation⟩ -/-- General category: other punctuation (`Po`) -/ -@[deprecated Unicode.GC.Po (since := "1.2.0")] -protected def GeneralCategory.Po : GeneralCategory := ⟨_, some .otherPunctuation⟩ -/-- General category: symbol (`S`) -/ -@[deprecated Unicode.GC.S (since := "1.2.0")] -protected def GeneralCategory.S : GeneralCategory := ⟨.symbol, none⟩ -/-- General category: math symbol (`Sm`) -/ -@[deprecated Unicode.GC.Sm (since := "1.2.0")] -protected def GeneralCategory.Sm : GeneralCategory := ⟨_, some .mathSymbol⟩ -/-- General category: currency symbol (`Sc`) -/ -@[deprecated Unicode.GC.Sc (since := "1.2.0")] -protected def GeneralCategory.Sc : GeneralCategory := ⟨_, some .currencySymbol⟩ -/-- General category: modifier symbol (`Sk`) -/ -@[deprecated Unicode.GC.Sk (since := "1.2.0")] -protected def GeneralCategory.Sk : GeneralCategory := ⟨_, some .modifierSymbol⟩ -/-- General category: other symbol (`So`) -/ -@[deprecated Unicode.GC.So (since := "1.2.0")] -protected def GeneralCategory.So : GeneralCategory := ⟨_, some .otherSymbol⟩ -/-- General category: separator (`Z`) -/ -@[deprecated Unicode.GC.Z (since := "1.2.0")] -protected def GeneralCategory.Z : GeneralCategory := ⟨.separator, none⟩ -/-- General category: space separator (`Zs`) -/ -@[deprecated Unicode.GC.Zs (since := "1.2.0")] -protected def GeneralCategory.Zs : GeneralCategory := ⟨_, some .spaceSeparator⟩ -/-- General category: line separator (`Zl`) -/ -@[deprecated Unicode.GC.Zl (since := "1.2.0")] -protected def GeneralCategory.Zl : GeneralCategory := ⟨_, some .lineSeparator⟩ -/-- General category: paragraph separator (`Zp`) -/ -@[deprecated Unicode.GC.Zp (since := "1.2.0")] -protected def GeneralCategory.Zp : GeneralCategory := ⟨_, some .paragraphSeparator⟩ -/-- General category: other (`C`) -/ -@[deprecated Unicode.GC.C (since := "1.2.0")] -protected def GeneralCategory.C : GeneralCategory := ⟨.other, none⟩ -/-- General category: control (`Cc`) -/ -@[deprecated Unicode.GC.Cc (since := "1.2.0")] -protected def GeneralCategory.Cc : GeneralCategory := ⟨_, some .control⟩ -/-- General category: format (`Cf`) -/ -@[deprecated Unicode.GC.Cf (since := "1.2.0")] -protected def GeneralCategory.Cf : GeneralCategory := ⟨_, some .format⟩ -/-- General category: surrogate (`Cs`) -/ -@[deprecated Unicode.GC.Cs (since := "1.2.0")] -protected def GeneralCategory.Cs : GeneralCategory := ⟨_, some .surrogate⟩ -/-- General category: private use (`Co`) -/ -@[deprecated Unicode.GC.Co (since := "1.2.0")] -protected def GeneralCategory.Co : GeneralCategory := ⟨_, some .privateUse⟩ -/-- General category: unassigned (`Cn`) -/ -@[deprecated Unicode.GC.Cn (since := "1.2.0")] -protected def GeneralCategory.Cn : GeneralCategory := ⟨_, some .unassigned⟩ - -def GeneralCategory.toGC : GeneralCategory → GC -| ⟨.letter, none⟩ => .L -| ⟨_, some .casedLetter⟩ => .LC -| ⟨_, some .uppercaseLetter⟩ => .Lu -| ⟨_, some .lowercaseLetter⟩ => .Ll -| ⟨_, some .titlecaseLetter⟩ => .Lt -| ⟨_, some .modifierLetter⟩ => .Lm -| ⟨_, some .otherLetter⟩ => .Lo -| ⟨.mark, none⟩ => .M -| ⟨_, some .nonspacingMark⟩ => .Mn -| ⟨_, some .spacingMark⟩ => .Mc -| ⟨_, some .enclosingMark⟩ => .Me -| ⟨.number, none⟩ => .N -| ⟨_, some .decimalNumber⟩ => .Nd -| ⟨_, some .letterNumber⟩ => .Nl -| ⟨_, some .otherNumber⟩ => .No -| ⟨.punctuation, none⟩ => .P -| ⟨_, some .connectorPunctuation⟩ => .Pc -| ⟨_, some .dashPunctuation⟩ => .Pd -| ⟨_, some .groupPunctuation⟩ => .PG -| ⟨_, some .openPunctuation⟩ => .Ps -| ⟨_, some .closePunctuation⟩ => .Pe -| ⟨_, some .quotePunctuation⟩ => .PQ -| ⟨_, some .initialPunctuation⟩ => .Pi -| ⟨_, some .finalPunctuation⟩ => .Pf -| ⟨_, some .otherPunctuation⟩ => .Po -| ⟨.symbol, none⟩ => .S -| ⟨_, some .mathSymbol⟩ => .Sm -| ⟨_, some .currencySymbol⟩ => .Sc -| ⟨_, some .modifierSymbol⟩ => .Sk -| ⟨_, some .otherSymbol⟩ => .So -| ⟨.separator, none⟩ => .Z -| ⟨_, some .spaceSeparator⟩ => .Zs -| ⟨_, some .lineSeparator⟩ => .Zl -| ⟨_, some .paragraphSeparator⟩ => .Zp -| ⟨.other, none⟩ => .C -| ⟨_, some .control⟩ => .Cc -| ⟨_, some .format⟩ => .Cf -| ⟨_, some .surrogate⟩ => .Cs -| ⟨_, some .privateUse⟩ => .Co -| ⟨_, some .unassigned⟩ => .Cn - -@[deprecated some (since := "1.2.0")] -def GeneralCategory.ofGC? (c : GC) : Option GeneralCategory := - if c == .C then some .C else - if c == .Cc then some .Cc else - if c == .Cf then some .Cf else - if c == .Cs then some .Cs else - if c == .Co then some .Co else - if c == .Cn then some .Cn else - if c == .L then some .L else - if c == .LC then some .LC else - if c == .Lu then some .Lu else - if c == .Ll then some .Ll else - if c == .Lt then some .Lt else - if c == .Lm then some .Lm else - if c == .Lo then some .Lo else - if c == .M then some .M else - if c == .Mn then some .Mn else - if c == .Mc then some .Mc else - if c == .Me then some .Me else - if c == .N then some .N else - if c == .Nd then some .Nd else - if c == .Nl then some .Nl else - if c == .No then some .No else - if c == .P then some .P else - if c == .PG then some .PG else - if c == .PQ then some .PQ else - if c == .Pc then some .Pc else - if c == .Pd then some .Pd else - if c == .Ps then some .Ps else - if c == .Pe then some .Pe else - if c == .Pi then some .Pi else - if c == .Pf then some .Pf else - if c == .Po then some .Po else - if c == .S then some .S else - if c == .Sm then some .Sm else - if c == .Sc then some .Sc else - if c == .Sk then some .Sk else - if c == .So then some .So else - if c == .Z then some .Z else - if c == .Zs then some .Zs else - if c == .Zl then some .Zl else - if c == .Zp then some .Zp else - none - -@[deprecated id (since := "1.2.0")] -def GeneralCategory.ofGC! (c : GC) : GeneralCategory := - (ofGC? c).get! - -/-- String abbreviation for general category -/ -@[deprecated Unicode.GC.toAbbrev! (since := "1.2.0")] -def GeneralCategory.toAbbrev (c : GeneralCategory) : String := - c.toGC.toAbbrev! - -/-- Get general category from string abbreviation -/ -@[deprecated Unicode.GC.ofAbbrev? (since := "1.2.0")] -def GeneralCategory.ofAbbrev? (s : String.Slice) : Option GeneralCategory := - GC.ofAbbrev? s >>= ofGC? - -@[deprecated Unicode.GC.ofAbbrev! (since := "1.2.0"), inherit_doc GeneralCategory.ofAbbrev?] -def GeneralCategory.ofAbbrev! (s : String.Slice) : GeneralCategory := - match ofAbbrev? s with - | some gc => gc - | none => panic! "invalid general category abbreviation" - -instance : Repr GeneralCategory where - reprPrec gc _ := s!"Unicode.GeneralCategory.{gc.toAbbrev}" - -end - /-! ## Numeric Type and Value ## -/ diff --git a/UnicodeData/Basic.lean b/UnicodeData/Basic.lean index 301079d02..20b06e55f 100644 --- a/UnicodeData/Basic.lean +++ b/UnicodeData/Basic.lean @@ -49,34 +49,6 @@ structure UnicodeData where titlecase : Option Char := none deriving BEq -@[deprecated UnicodeData.code (since := "1.2.0")] -abbrev UnicodeData.codeValue := @UnicodeData.code - -@[deprecated UnicodeData.name (since := "1.2.0")] -abbrev UnicodeData.characterName := @UnicodeData.name - -set_option linter.deprecated false in -@[deprecated UnicodeData.gc (since := "1.2.0")] -def UnicodeData.generalCategory (d : UnicodeData) : GeneralCategory := .ofGC! d.gc - -@[deprecated UnicodeData.bidi (since := "1.2.0")] -abbrev UnicodeData.bidiClass := @UnicodeData.bidi - -@[deprecated UnicodeData.cc (since := "1.2.0")] -abbrev UnicodeData.canonicalCombiningClass := @UnicodeData.cc - -@[deprecated UnicodeData.cc (since := "1.2.0")] -abbrev UnicodeData.decompositionMapping := @UnicodeData.decomp - -@[deprecated UnicodeData.lowercase (since := "1.2.0")] -abbrev UnicodeData.lowercaseMapping := @UnicodeData.lowercase - -@[deprecated UnicodeData.uppercase (since := "1.2.0")] -abbrev UnicodeData.uppercaseMapping := @UnicodeData.uppercase - -@[deprecated UnicodeData.titlecase (since := "1.2.0")] -abbrev UnicodeData.titlecaseMapping := @UnicodeData.titlecase - instance : Inhabited UnicodeData where default := { code := 0 From 602a249b9ed84b69186297021e6cbd1472e5e590 Mon Sep 17 00:00:00 2001 From: "F. G. Dorais" Date: Tue, 25 Nov 2025 01:04:42 -0500 Subject: [PATCH 2/2] chore: use modules --- UnicodeBasic.lean | 8 +- UnicodeBasic/CharacterDatabase.lean | 24 +++--- UnicodeBasic/Hangul.lean | 51 ++++++----- UnicodeBasic/TableLookup.lean | 49 ++++++----- UnicodeBasic/Types.lean | 128 ++++++++++++++-------------- UnicodeData.lean | 5 +- UnicodeData/Basic.lean | 32 +++---- UnicodeData/PropList.lean | 22 ++--- lakefile.lean | 7 +- makeCLib.lean | 5 ++ testTables.lean | 10 ++- 11 files changed, 177 insertions(+), 164 deletions(-) diff --git a/UnicodeBasic.lean b/UnicodeBasic.lean index e2d959cc8..4c73a1636 100644 --- a/UnicodeBasic.lean +++ b/UnicodeBasic.lean @@ -3,8 +3,10 @@ Copyright © 2023-2025 François G. Dorais. All rights reserved. Released under Apache 2.0 license as described in the file LICENSE. -/ -import UnicodeBasic.Types -import UnicodeBasic.TableLookup +module + +public import UnicodeBasic.TableLookup +public import UnicodeBasic.Types /-! # General API # @@ -40,7 +42,7 @@ import UnicodeBasic.TableLookup non-titlecase letter or a separator. -/ -namespace Unicode +public section namespace Unicode /-! ## Name ## diff --git a/UnicodeBasic/CharacterDatabase.lean b/UnicodeBasic/CharacterDatabase.lean index 730ce2066..42d0524e0 100644 --- a/UnicodeBasic/CharacterDatabase.lean +++ b/UnicodeBasic/CharacterDatabase.lean @@ -3,6 +3,8 @@ Copyright © 2023 François G. Dorais. All rights reserved. Released under Apache 2.0 license as described in the file LICENSE. -/ +module + namespace Unicode /-- Stream type for Unicode Character Database (UCD) files @@ -12,7 +14,7 @@ namespace Unicode around field values are not significant. Line comments are prefixed with a number sign `#` (U+0023). -/ -structure UCDStream extends String.Slice where +public structure UCDStream extends String.Slice where /-- `isUnihan` is true if the records are tab separated -/ isUnihan := false deriving Inhabited @@ -20,23 +22,23 @@ deriving Inhabited namespace UCDStream /-- Make a `UCDStream` from a string slice -/ -def ofStringSlice (str : String.Slice) : UCDStream := { str with } +public def ofStringSlice (str : String.Slice) : UCDStream := { str with } /-- Make a `UCDStream` from a string -/ -def ofString (str : String) : UCDStream := ofStringSlice str.toSlice +public def ofString (str : String) : UCDStream := ofStringSlice str.toSlice /-- Make a `UCDStream` from a substring -/ -def ofSubstring (str : Substring.Raw) : UCDStream := ofStringSlice str.toString.toSlice +public def ofSubstring (str : Substring.Raw) : UCDStream := ofStringSlice str.toString.toSlice /-- Make a `UCDStream` from a file -/ -def ofFile (path : System.FilePath) : IO UCDStream := +public def ofFile (path : System.FilePath) : IO UCDStream := ofString <$> IO.FS.readFile path /-- Get the next line from the `UCDStream` Line comments are stripped and blank lines are skipped. -/ -protected partial def nextLine? (stream : UCDStream) : Option (String.Slice × UCDStream) := do +private partial def nextLine? (stream : UCDStream) : Option (String.Slice × UCDStream) := do let line := stream.trimAsciiEnd.takeWhile (.!='\n') if h : line.rawEndPos < stream.rawEndPos then let nextPos := stream.findNextPos line.rawEndPos h @@ -51,7 +53,7 @@ protected partial def nextLine? (stream : UCDStream) : Option (String.Slice × U Spaces around field values are trimmed. -/ -protected def next? (stream : UCDStream) : Option (Array String.Slice × UCDStream) := do +private def next? (stream : UCDStream) : Option (Array String.Slice × UCDStream) := do let sep := if stream.isUnihan then "\t" else ";" let mut arr := #[] let (line, table) ← stream.nextLine? @@ -59,9 +61,5 @@ protected def next? (stream : UCDStream) : Option (Array String.Slice × UCDStre arr := arr.push item.trimAscii return (arr, table) -instance : Std.Stream UCDStream (Array String.Slice) where - next? := UCDStream.next? - -end UCDStream - -end Unicode +public instance : Std.Stream UCDStream (Array String.Slice) where + next? := private UCDStream.next? diff --git a/UnicodeBasic/Hangul.lean b/UnicodeBasic/Hangul.lean index e1b7f9e18..8bd4acb9d 100644 --- a/UnicodeBasic/Hangul.lean +++ b/UnicodeBasic/Hangul.lean @@ -3,32 +3,34 @@ Copyright © 2024 François G. Dorais. All rights reserved. Released under Apache 2.0 license as described in the file LICENSE. -/ +module + namespace Unicode.Hangul -private def shortNameL : Array String := +def shortNameL : Array String := #["G", "GG", "N", "D", "DD", "R", "M", "B", "BB", "S", "SS", "", "J", "JJ", "C", "K", "T", "P", "H"] -private def shortNameV : Array String := +def shortNameV : Array String := #["A", "AE", "YA", "YAE", "EO", "E", "YEO", "YE", "O", "WA", "WAE", "OE", "YO", "U", "WEO", "WE", "WI", "YU", "EU", "YI", "I"] -private def shortNameT : Array String := +def shortNameT : Array String := #["", "G", "GG", "GS", "N", "NJ", "NH", "D", "L", "LG", "LM", "LB", "LS", "LT", "LP", "LH", "M", "B", "BS", "S", "SS", "NG", "J", "C", "K", "T", "P", "H"] -private abbrev sizeL := shortNameL.size -- 19 -private abbrev sizeV := shortNameV.size -- 21 -private abbrev sizeT := shortNameT.size -- 28 -private abbrev sizeLV := sizeL * sizeV -- 399 -private abbrev sizeVT := sizeV * sizeT -- 588 -private abbrev sizeLVT := sizeL * sizeV * sizeT -- 11172 +abbrev sizeL := shortNameL.size -- 19 +abbrev sizeV := shortNameV.size -- 21 +abbrev sizeT := shortNameT.size -- 28 +abbrev sizeLV := sizeL * sizeV -- 399 +abbrev sizeVT := sizeV * sizeT -- 588 +abbrev sizeLVT := sizeL * sizeV * sizeT -- 11172 -private abbrev baseL : UInt32 := 0x1100 -private abbrev baseV : UInt32 := 0x1161 -private abbrev baseT : UInt32 := 0x11A7 +abbrev baseL : UInt32 := 0x1100 +abbrev baseV : UInt32 := 0x1161 +abbrev baseT : UInt32 := 0x11A7 /-- Number of Hangul syllables -/ def Syllable.size := sizeLVT @@ -40,13 +42,13 @@ def Syllable.base : UInt32 := 0xAC00 def Syllable.last : UInt32 := ⟨0xAC00 + Syllable.size - 1, by decide⟩ /-- Hangul syllable type -/ -structure Syllable where +public structure Syllable where /-- L part of syllable -/ - toL : Fin sizeL + private toL : Fin sizeL /-- V part of syllable -/ - toV : Fin sizeV + private toV : Fin sizeV /-- T part of syllable (0 means none) -/ - toT : Fin sizeT := ⟨0, by decide⟩ + private toT : Fin sizeT := ⟨0, by decide⟩ deriving DecidableEq, Repr instance : Inhabited Syllable where @@ -71,28 +73,27 @@ def Syllable.index (s : Syllable) : Fin Syllable.size := ⟨lv * sizeT + t, this⟩ /-- Get short name for Hangul syllable -/ -@[inline] -def Syllable.getShortName (s : Syllable) : String := shortNameL[s.toL] ++ shortNameV[s.toV] ++ shortNameT[s.toT] +public def Syllable.getShortName (s : Syllable) : String := shortNameL[s.toL] ++ shortNameV[s.toV] ++ shortNameT[s.toT] /-- Get L part character for syllable -/ -def Syllable.getLChar (s : Syllable) : Char := +public def Syllable.getLChar (s : Syllable) : Char := .ofNat (baseL.toNat + s.toL) /-- Get V part character for syllable -/ -def Syllable.getVChar (s : Syllable) : Char := +public def Syllable.getVChar (s : Syllable) : Char := .ofNat (baseV.toNat + s.toV) /-- Get LV part character for syllable -/ -def Syllable.getLVChar (s : Syllable) : Char := +public def Syllable.getLVChar (s : Syllable) : Char := .ofNat (base.toNat + sizeT * s.toLV) /-- Get T part character for syllable -/ -def Syllable.getTChar? (s : Syllable) : Option Char := +public def Syllable.getTChar? (s : Syllable) : Option Char := if s.toT.val == 0 then none else return .ofNat (baseT.toNat + s.toT) /-- Get Hangul syllable from code point -/ -def getSyllable? (code : UInt32) : Option Syllable := +public def getSyllable? (code : UInt32) : Option Syllable := if code < 0xAC00 then none else let index := (code - 0xAC00).toNat if h : index ≥ Syllable.size then none else @@ -108,9 +109,7 @@ def getSyllable? (code : UInt32) : Option Syllable := some ⟨⟨lpart, lislt⟩, ⟨vpart, vislt⟩, ⟨tpart, tislt⟩⟩ @[inherit_doc getSyllable?] -def getSyllable! (code : UInt32) : Syllable := +public def getSyllable! (code : UInt32) : Syllable := match getSyllable? code with | some s => s | none => panic! "not a Hangul syllable" - -end Unicode.Hangul diff --git a/UnicodeBasic/TableLookup.lean b/UnicodeBasic/TableLookup.lean index bf66cc859..6141c362b 100644 --- a/UnicodeBasic/TableLookup.lean +++ b/UnicodeBasic/TableLookup.lean @@ -3,16 +3,20 @@ Copyright © 2024-2025 François G. Dorais. All rights reserved. Released under Apache 2.0 license as described in the file LICENSE. -/ -import UnicodeBasic.CharacterDatabase -import UnicodeBasic.Hangul -import UnicodeBasic.Types +module + +import all UnicodeBasic.CharacterDatabase +import all UnicodeBasic.Hangul +import all UnicodeBasic.Types + +public import UnicodeBasic.Types namespace Unicode namespace CLib @[extern "unicode_case_lookup"] -protected opaque lookupCase (c : UInt32) : UInt64 +public protected opaque lookupCase (c : UInt32) : UInt64 protected abbrev oUpper : UInt64 := 0x100000000 protected abbrev oLower : UInt64 := 0x200000000 @@ -20,7 +24,7 @@ protected abbrev oAlpha : UInt64 := 0x400000000 protected abbrev oMath : UInt64 := 0x800000000 @[extern "unicode_prop_lookup"] -protected opaque lookupProp (c : UInt32) : UInt64 +public protected opaque lookupProp (c : UInt32) : UInt64 end CLib @@ -69,7 +73,7 @@ private def parsePropTable (s : String) : Thunk <| Array (UInt32 × UInt32) := I /-- Get bidirectional class using lookup table Unicode property: `Bidi_Class` -/ -def lookupBidiClass (c : UInt32) : BidiClass := +public def lookupBidiClass (c : UInt32) : BidiClass := let table := table.get if c < table[0]!.1 then .BN else match table[find c (fun i => table[i]!.1) 0 table.size.toUSize]! with @@ -82,7 +86,7 @@ where /-- Get canonical combining class using lookup table Unicode property: `Canonical_Combining_Class` -/ -def lookupCanonicalCombiningClass (c : UInt32) : Nat := +public def lookupCanonicalCombiningClass (c : UInt32) : Nat := let t := table.get if c < t[0]!.1 then 0 else match t[find c (fun i => t[i]!.1) 0 t.size.toUSize]! with @@ -97,7 +101,7 @@ where Unicode properties: `Decomposition_Mapping` `Decomposition_Type=Canonical` -/ -def lookupCanonicalDecompositionMapping (c : UInt32) : List UInt32 := +public def lookupCanonicalDecompositionMapping (c : UInt32) : List UInt32 := -- Hangul syllables if Hangul.Syllable.base ≤ c && c ≤ Hangul.Syllable.last then let s := Hangul.getSyllable! c @@ -120,7 +124,7 @@ where `Simple_Lowercase_Mapping` `Simple_Uppercase_Mapping` `Simple_Titlecase_Mapping` -/ -def lookupCaseMapping (c : UInt32) : UInt32 × UInt32 × UInt32 := +public def lookupCaseMapping (c : UInt32) : UInt32 × UInt32 × UInt32 := let v : UInt64 := CLib.lookupCase c if v == 0 then (c, c, c) else let cu : UInt32 := v.toUInt32 &&& 0x001FFFFF @@ -133,7 +137,7 @@ def lookupCaseMapping (c : UInt32) : UInt32 × UInt32 × UInt32 := Unicode properties: `Decomposition_Mapping` `Decomposition_Type` -/ -def lookupDecompositionMapping? (c : UInt32) : Option DecompositionMapping := +public def lookupDecompositionMapping? (c : UInt32) : Option DecompositionMapping := -- Hangul syllables if Hangul.Syllable.base ≤ c && c ≤ Hangul.Syllable.last then let s := Hangul.getSyllable! c @@ -178,12 +182,12 @@ where Unicode property: `General_Category` -/ @[inline] -def lookupGC (c : UInt32) : GC := CLib.lookupProp c |>.toUInt32 +public def lookupGC (c : UInt32) : GC := CLib.lookupProp c |>.toUInt32 /-- Get name of a code point using lookup table Unicode property: `Name` -/ -def lookupName (c : UInt32) : String := +public def lookupName (c : UInt32) : String := let table := table.get if c < table[0]!.1 then unreachable! else match table[find c (fun i => table[i]!.1) 0 table.size.toUSize]! with @@ -235,7 +239,7 @@ where Unicode properties: `Numeric_Type` `Numeric_Value` -/ -def lookupNumericValue (c : UInt32) : Option NumericType := +public def lookupNumericValue (c : UInt32) : Option NumericType := let table := table.get if c < table[0]!.1 then none else match table[find c (fun i => table[i]!.1) 0 table.size.toUSize]! with @@ -292,7 +296,7 @@ def lookupOther (c : UInt32) : UInt32 := Unicode property: `Alphabetic` -/ @[inline] -def lookupAlphabetic (c : UInt32) : Bool := +public def lookupAlphabetic (c : UInt32) : Bool := let m := CLib.oAlpha ||| (GC.L ||| GC.Nl).toUInt64 CLib.lookupProp c &&& m != 0 @@ -300,7 +304,8 @@ def lookupAlphabetic (c : UInt32) : Bool := Unicode property: `Bidi_Mirrored` -/ -def lookupBidiMirrored (c : UInt32) : Bool := +@[inline] +public def lookupBidiMirrored (c : UInt32) : Bool := let table := table.get if c < table[0]!.1 then false else match table[find c (fun i => table[i]!.1) 0 table.size.toUSize]! with @@ -313,7 +318,7 @@ where Unicode property: `Cased` -/ @[inline] -def lookupCased (c : UInt32 ) : Bool := +public def lookupCased (c : UInt32 ) : Bool := let m := CLib.oUpper ||| CLib.oLower ||| GC.LC.toUInt64 CLib.lookupProp c &&& m != 0 @@ -321,7 +326,7 @@ def lookupCased (c : UInt32 ) : Bool := Unicode property: `Lowercase` -/ @[inline] -def lookupLowercase (c : UInt32) : Bool := +public def lookupLowercase (c : UInt32) : Bool := let m := CLib.oLower ||| GC.Ll.toUInt64 CLib.lookupProp c &&& m != 0 @@ -330,7 +335,7 @@ def lookupLowercase (c : UInt32) : Bool := Unicode property: `Math` -/ @[inline] -def lookupMath (c : UInt32) : Bool := +public def lookupMath (c : UInt32) : Bool := let m := CLib.oMath ||| GC.Sm.toUInt64 CLib.lookupProp c &&& m != 0 @@ -338,21 +343,21 @@ def lookupMath (c : UInt32) : Bool := Unicode property: `Titlecase` -/ @[inline] -def lookupTitlecase (c : UInt32) : Bool := +public def lookupTitlecase (c : UInt32) : Bool := lookupGC c == GC.Lt /-- Check if code point is a uppercase letter using lookup table Unicode property: `Uppercase` -/ @[inline] -def lookupUppercase (c : UInt32) : Bool := +public def lookupUppercase (c : UInt32) : Bool := let m := CLib.oUpper ||| GC.Lu.toUInt64 CLib.lookupProp c &&& m != 0 /-- Check if code point is a white space character using lookup table Unicode property: `White_Space` -/ -def lookupWhiteSpace (c : UInt32) : Bool := +public def lookupWhiteSpace (c : UInt32) : Bool := let table := table.get if c < table[0]!.1 then false else match table[find c (fun i => table[i]!.1) 0 table.size.toUSize]! with @@ -360,5 +365,3 @@ def lookupWhiteSpace (c : UInt32) : Bool := where str : String := include_str "../data/table/White_Space.txt" table : Thunk <| Array (UInt32 × UInt32) := parsePropTable str - -end Unicode diff --git a/UnicodeBasic/Types.lean b/UnicodeBasic/Types.lean index f3b863de4..08f7cf95b 100644 --- a/UnicodeBasic/Types.lean +++ b/UnicodeBasic/Types.lean @@ -3,14 +3,16 @@ Copyright © 2023-2025 François G. Dorais. All rights reserved. Released under Apache 2.0 license as described in the file LICENSE. -/ +module + /-- Low-level conversion from `UInt32` to `Char` (*unsafe*) This function translates to a no-op in the compiler. However, it does not check whether the `UInt32` code is a valid `Char` value. Only use where it's known for external reasons that the code is valid. -/ -protected unsafe def Char.mkUnsafe : UInt32 → Char := unsafeCast +public protected unsafe def Char.mkUnsafe : UInt32 → Char := unsafeCast -namespace Unicode +public section namespace Unicode -- forward-port: lean4#11341 /-- Coercion from `String` to `String.Slice` -/ @@ -191,7 +193,7 @@ deriving DecidableEq /-- General category (GC) Unicode property: `General_Category` -/ -def GC := UInt32 deriving DecidableEq, Inhabited +@[expose] def GC := UInt32 deriving DecidableEq, Inhabited namespace GC @@ -207,73 +209,73 @@ instance : HasSubset GC where instance (x y : GC) : Decidable (x ⊆ y) := inferInstanceAs (Decidable (_ == _)) -protected def none : GC := (0x00000000 : UInt32) -protected def univ : GC := (0x3FFFFFFF : UInt32) - -protected def Lu : GC := (0x00000001 : UInt32) -protected def Ll : GC := (0x00000002 : UInt32) -protected def Lt : GC := (0x00000004 : UInt32) -protected def Lm : GC := (0x00000008 : UInt32) -protected def Lo : GC := (0x00000010 : UInt32) -protected def LC : GC := .Lu ||| .Ll ||| .Lt -protected def L : GC := .Lu ||| .Ll ||| .Lt ||| .Lm ||| .Lo - -protected def Mn : GC := (0x00000020 : UInt32) -protected def Mc : GC := (0x00000040 : UInt32) -protected def Me : GC := (0x00000080 : UInt32) -protected def M : GC := .Mn ||| .Mc ||| .Me - -protected def Nd : GC := (0x00000100 : UInt32) -protected def Nl : GC := (0x00000200 : UInt32) -protected def No : GC := (0x00000400 : UInt32) -protected def N : GC := .Nd ||| .Nl ||| .No - -protected def Pc : GC := (0x00000800 : UInt32) -protected def Pd : GC := (0x00001000 : UInt32) -protected def Ps : GC := (0x00002000 : UInt32) -protected def Pe : GC := (0x00004000 : UInt32) -protected def Pi : GC := (0x00008000 : UInt32) -protected def Pf : GC := (0x00010000 : UInt32) -protected def Po : GC := (0x00020000 : UInt32) -protected def PG : GC := .Ps ||| .Pe -protected def PQ : GC := .Pi ||| .Pf -protected def P : GC := .Pc ||| .Pd ||| .Ps ||| .Pe ||| .Pi ||| .Pf ||| .Po - -protected def Sm : GC := (0x00040000 : UInt32) -protected def Sc : GC := (0x00080000 : UInt32) -protected def Sk : GC := (0x00100000 : UInt32) -protected def So : GC := (0x00200000 : UInt32) -protected def S : GC := .Sm ||| .Sc ||| .Sk ||| .So - -protected def Zs : GC := (0x00400000 : UInt32) -protected def Zl : GC := (0x00800000 : UInt32) -protected def Zp : GC := (0x01000000 : UInt32) -protected def Z : GC := .Zs ||| .Zl ||| .Zp - -protected def Cc : GC := (0x02000000 : UInt32) -protected def Cf : GC := (0x04000000 : UInt32) -protected def Cs : GC := (0x08000000 : UInt32) -protected def Co : GC := (0x10000000 : UInt32) -protected def Cn : GC := (0x20000000 : UInt32) -protected def C : GC := .Cc ||| .Cf ||| .Cs ||| .Co ||| .Cn +@[expose] protected def none : GC := (0x00000000 : UInt32) +@[expose] protected def univ : GC := (0x3FFFFFFF : UInt32) + +@[expose] protected def Lu : GC := (0x00000001 : UInt32) +@[expose] protected def Ll : GC := (0x00000002 : UInt32) +@[expose] protected def Lt : GC := (0x00000004 : UInt32) +@[expose] protected def Lm : GC := (0x00000008 : UInt32) +@[expose] protected def Lo : GC := (0x00000010 : UInt32) +@[expose] protected def LC : GC := .Lu ||| .Ll ||| .Lt +@[expose] protected def L : GC := .Lu ||| .Ll ||| .Lt ||| .Lm ||| .Lo + +@[expose] protected def Mn : GC := (0x00000020 : UInt32) +@[expose] protected def Mc : GC := (0x00000040 : UInt32) +@[expose] protected def Me : GC := (0x00000080 : UInt32) +@[expose] protected def M : GC := .Mn ||| .Mc ||| .Me + +@[expose] protected def Nd : GC := (0x00000100 : UInt32) +@[expose] protected def Nl : GC := (0x00000200 : UInt32) +@[expose] protected def No : GC := (0x00000400 : UInt32) +@[expose] protected def N : GC := .Nd ||| .Nl ||| .No + +@[expose] protected def Pc : GC := (0x00000800 : UInt32) +@[expose] protected def Pd : GC := (0x00001000 : UInt32) +@[expose] protected def Ps : GC := (0x00002000 : UInt32) +@[expose] protected def Pe : GC := (0x00004000 : UInt32) +@[expose] protected def Pi : GC := (0x00008000 : UInt32) +@[expose] protected def Pf : GC := (0x00010000 : UInt32) +@[expose] protected def Po : GC := (0x00020000 : UInt32) +@[expose] protected def PG : GC := .Ps ||| .Pe +@[expose] protected def PQ : GC := .Pi ||| .Pf +@[expose] protected def P : GC := .Pc ||| .Pd ||| .Ps ||| .Pe ||| .Pi ||| .Pf ||| .Po + +@[expose] protected def Sm : GC := (0x00040000 : UInt32) +@[expose] protected def Sc : GC := (0x00080000 : UInt32) +@[expose] protected def Sk : GC := (0x00100000 : UInt32) +@[expose] protected def So : GC := (0x00200000 : UInt32) +@[expose] protected def S : GC := .Sm ||| .Sc ||| .Sk ||| .So + +@[expose] protected def Zs : GC := (0x00400000 : UInt32) +@[expose] protected def Zl : GC := (0x00800000 : UInt32) +@[expose] protected def Zp : GC := (0x01000000 : UInt32) +@[expose] protected def Z : GC := .Zs ||| .Zl ||| .Zp + +@[expose] protected def Cc : GC := (0x02000000 : UInt32) +@[expose] protected def Cf : GC := (0x04000000 : UInt32) +@[expose] protected def Cs : GC := (0x08000000 : UInt32) +@[expose] protected def Co : GC := (0x10000000 : UInt32) +@[expose] protected def Cn : GC := (0x20000000 : UInt32) +@[expose] protected def C : GC := .Cc ||| .Cf ||| .Cs ||| .Co ||| .Cn def mk : (major : MajorGeneralCategory) → Option (MinorGeneralCategory major) → GC -| .letter, none => .L +| .letter, .none => .L | _, some .casedLetter => .LC | _, some .uppercaseLetter => .Lu | _, some .lowercaseLetter => .Ll | _, some .titlecaseLetter => .Lt | _, some .modifierLetter => .Lm | _, some .otherLetter => .Lo -| .mark, none => .M +| .mark, .none => .M | _, some .nonspacingMark => .Mn | _, some .spacingMark => .Mc | _, some .enclosingMark => .Me -| .number, none => .N +| .number, .none => .N | _, some .decimalNumber => .Nd | _, some .letterNumber => .Nl | _, some .otherNumber => .No -| .punctuation, none => .P +| .punctuation, .none => .P | _, some .connectorPunctuation => .Pc | _, some .dashPunctuation => .Pd | _, some .groupPunctuation => .PG @@ -283,23 +285,23 @@ def mk : (major : MajorGeneralCategory) → Option (MinorGeneralCategory major) | _, some .initialPunctuation => .Pi | _, some .finalPunctuation => .Pf | _, some .otherPunctuation => .Po -| .symbol, none => .S +| .symbol, .none => .S | _, some .mathSymbol => .Sm | _, some .currencySymbol => .Sc | _, some .modifierSymbol => .Sk | _, some .otherSymbol => .So -| .separator, none => .Z +| .separator, .none => .Z | _, some .spaceSeparator => .Zs | _, some .lineSeparator => .Zl | _, some .paragraphSeparator => .Zp -| .other, none => .C +| .other, .none => .C | _, some .control => .Cc | _, some .format => .Cf | _, some .surrogate => .Cs | _, some .privateUse => .Co | _, some .unassigned => .Cn -private def reprAux (x : GC) (extra := false) : List String := Id.run do +def reprAux (x : GC) (extra := false) : List String := Id.run do let mut c := #[] if .L ⊆ x then c := c.push "L" @@ -452,7 +454,7 @@ def ofAbbrev? (s : String.Slice) : Option GC := def ofAbbrev! (s : String.Slice) : GC := match ofAbbrev? s with | some c => c - | none => panic! "invalid general category" + | _ => panic! "invalid general category" def ofString? (s : String.Slice) : Option GC := do let mut c := .none @@ -463,7 +465,7 @@ def ofString? (s : String.Slice) : Option GC := do def ofString! (s : String.Slice) : GC := match ofString? s with | some c => c - | none => panic! "invalid general category" + | _ => panic! "invalid general category" end GC @@ -754,5 +756,3 @@ def BidiClass.ofAbbrev! (abbr : String.Slice) : BidiClass := instance : Repr BidiClass where reprPrec bc _ := s!"Unicode.BidiClass.{bc.toAbbrev}" - -end Unicode diff --git a/UnicodeData.lean b/UnicodeData.lean index 99b106182..b03f3ca49 100644 --- a/UnicodeData.lean +++ b/UnicodeData.lean @@ -2,6 +2,7 @@ Copyright © 2023-2024 François G. Dorais. All rights reserved. Released under Apache 2.0 license as described in the file LICENSE. -/ +module -import UnicodeData.Basic -import UnicodeData.PropList +public import UnicodeData.Basic +public import UnicodeData.PropList diff --git a/UnicodeData/Basic.lean b/UnicodeData/Basic.lean index 20b06e55f..85d576cd8 100644 --- a/UnicodeData/Basic.lean +++ b/UnicodeData/Basic.lean @@ -3,9 +3,12 @@ Copyright © 2023-2025 François G. Dorais. All rights reserved. Released under Apache 2.0 license as described in the file LICENSE. -/ -import UnicodeBasic.CharacterDatabase -import UnicodeBasic.Hangul -import UnicodeBasic.Types +module + +public import UnicodeBasic.Types + +import all UnicodeBasic.CharacterDatabase +import all UnicodeBasic.Hangul -- Issue: lean4#11275 namespace String.Slice @@ -24,7 +27,7 @@ end String.Slice namespace Unicode /-- Structure for data from `UnicodeData.txt` -/ -structure UnicodeData where +public structure UnicodeData where /-- Code Value -/ code : UInt32 /-- Character Name -/ @@ -58,7 +61,7 @@ instance : Inhabited UnicodeData where } /-- Make `UnicodeData` for noncharacter code point -/ -def UnicodeData.mkNoncharacter (code : UInt32) : UnicodeData where +public def UnicodeData.mkNoncharacter (code : UInt32) : UnicodeData where code := code name := -- Extracted from property `Noncharacter_Code_Point` @@ -129,7 +132,7 @@ def UnicodeData.mkTangutIdeograph (c : UInt32) : UnicodeData where protected def UnicodeData.txt := include_str "../data/UnicodeData.txt" /-- Parse `UnicodeData.txt` -/ -private unsafe def UnicodeData.init : IO (Array UnicodeData) := do +public unsafe def UnicodeData.init : IO (Array UnicodeData) := do let stream := UCDStream.ofString UnicodeData.txt let mut arr := #[] for record in stream do @@ -256,10 +259,10 @@ where /-- Parsed data from `UnicodeData.txt` -/ @[init UnicodeData.init] -protected def UnicodeData.data : Array UnicodeData := #[] +public protected def UnicodeData.data : Array UnicodeData := #[] /-- Get code point data from `UnicodeData.txt` -/ -partial def getUnicodeData? (code : UInt32) : Option UnicodeData := do +public partial def getUnicodeData? (code : UInt32) : Option UnicodeData := do if code > Unicode.max then none else if code ≤ 0x0377 then @@ -338,25 +341,24 @@ where find mid hi @[inherit_doc getUnicodeData?] -def getUnicodeData! (code : UInt32) := +public def getUnicodeData! (code : UInt32) := match getUnicodeData? code with | some data => data | none => panic! "code point out of range" /-- Get character data from `UnicodeData.txt` -/ -def getUnicodeData (char : Char) : UnicodeData := +public def getUnicodeData (char : Char) : UnicodeData := match getUnicodeData? char.val with | some data => data | none => unreachable! /-- Stream type to roll through all code points up to `Unicode.max`, yielding `UnicodeData` -/ -structure UnicodeDataStream where +public structure UnicodeDataStream where code : UInt32 := 0 index : USize := 0 default : UInt32 → UnicodeData := UnicodeData.mkNoncharacter - deriving Inhabited -private def UnicodeDataStream.next? (s : UnicodeDataStream) : Option (UnicodeData × UnicodeDataStream) := do +public def UnicodeDataStream.next? (s : UnicodeDataStream) : Option (UnicodeData × UnicodeDataStream) := do let c := s.code let i := s.index if c > Unicode.max then @@ -394,7 +396,5 @@ private def UnicodeDataStream.next? (s : UnicodeDataStream) : Option (UnicodeDat else return (.mkNoncharacter c, {s with code := c+1}) -instance : Std.Stream UnicodeDataStream UnicodeData where +public instance : Std.Stream UnicodeDataStream UnicodeData where next? := UnicodeDataStream.next? - -end Unicode diff --git a/UnicodeData/PropList.lean b/UnicodeData/PropList.lean index d3672e56a..a32fbc73e 100644 --- a/UnicodeData/PropList.lean +++ b/UnicodeData/PropList.lean @@ -3,13 +3,15 @@ Copyright © 2023-2024 François G. Dorais. All rights reserved. Released under Apache 2.0 license as described in the file LICENSE. -/ +module + import UnicodeBasic.Types -import UnicodeBasic.CharacterDatabase +import all UnicodeBasic.CharacterDatabase namespace Unicode /-- Structure containing supported character properties from `PropList.txt` -/ -structure PropList where +public structure PropList where /-- property `Noncharacter_Code_Point` -/ noncharacterCodePoint : Array (UInt32 × Option UInt32) := #[] /-- property `White_Space` -/ @@ -27,7 +29,7 @@ deriving Inhabited, Repr /-- Raw string form `PropList.txt` -/ protected def PropList.txt := include_str "../data/PropList.txt" -private unsafe def PropList.init : IO PropList := do +public unsafe def PropList.init : IO PropList := do let stream := UCDStream.ofString PropList.txt let mut list : PropList := {} for record in stream do @@ -52,7 +54,7 @@ private unsafe def PropList.init : IO PropList := do /-- Parsed data from `PropList.txt` -/ @[init PropList.init] -protected def PropList.data : PropList := {} +public protected def PropList.data : PropList := {} -- TODO: stop reinventing the wheel! /-- Binary search -/ @@ -71,7 +73,7 @@ private partial def find (code : UInt32) (data : Array (UInt32 × Option UInt32) /-- Check if code point has `White_Space` property from `PropList.txt` -/ @[inline] -def PropList.isWhiteSpace (code : UInt32) : Bool := +public def PropList.isWhiteSpace (code : UInt32) : Bool := let data := PropList.data.whiteSpace if data.size == 0 || code < data[0]!.fst then false else match data[find code data 0 data.size]! with @@ -80,7 +82,7 @@ def PropList.isWhiteSpace (code : UInt32) : Bool := /-- Check if code point has `Other_Math` property from `PropList.txt` -/ @[inline] -def PropList.isOtherMath (code : UInt32) : Bool := +public def PropList.isOtherMath (code : UInt32) : Bool := let data := PropList.data.otherMath if data.size == 0 || code < data[0]!.fst then false else match data[find code data 0 data.size]! with @@ -89,7 +91,7 @@ def PropList.isOtherMath (code : UInt32) : Bool := /-- Check if code point has `Other_Alphabetic` property from `PropList.txt` -/ @[inline] -def PropList.isOtherAlphabetic (code : UInt32) : Bool := +public def PropList.isOtherAlphabetic (code : UInt32) : Bool := let data := PropList.data.otherAlphabetic if data.size == 0 || code < data[0]!.fst then false else match data[find code data 0 data.size]! with @@ -98,7 +100,7 @@ def PropList.isOtherAlphabetic (code : UInt32) : Bool := /-- Check if code point has `Other_Lowercase` property from `PropList.txt` -/ @[inline] -def PropList.isOtherLowercase (code : UInt32) : Bool := +public def PropList.isOtherLowercase (code : UInt32) : Bool := let data := PropList.data.otherLowercase if data.size == 0 || code < data[0]!.fst then false else match data[find code data 0 data.size]! with @@ -107,11 +109,9 @@ def PropList.isOtherLowercase (code : UInt32) : Bool := /-- Check if code point has `Other_Uppercase` property from `PropList.txt` -/ @[inline] -def PropList.isOtherUppercase (code : UInt32) : Bool := +public def PropList.isOtherUppercase (code : UInt32) : Bool := let data := PropList.data.otherUppercase if data.size == 0 || code < data[0]!.fst then false else match data[find code data 0 data.size]! with | (val, none) => code == val | (_, some top) => code <= top - -end Unicode diff --git a/lakefile.lean b/lakefile.lean index 78f010f03..51be1d0e5 100644 --- a/lakefile.lean +++ b/lakefile.lean @@ -31,8 +31,10 @@ extern_lib libunicodeclib := UnicodeCLib.fetch @[default_target] lean_lib UnicodeBasic where moreLinkObjs := #[UnicodeCLib] + leanOptions := #[⟨`experimental.module, true⟩] -lean_lib UnicodeData +lean_lib UnicodeData where + leanOptions := #[⟨`experimental.module, true⟩] lean_exe lookup @@ -41,7 +43,8 @@ lean_exe makeTables lean_exe makeCLib @[test_driver] -lean_exe testTables +lean_exe testTables where + leanOptions := #[⟨`experimental.module, true⟩] /-- Download datafile from the Unicode Character Database (UCD) -/ script downloadUCD (args) do diff --git a/makeCLib.lean b/makeCLib.lean index 0157bdcbc..899e5ddc2 100644 --- a/makeCLib.lean +++ b/makeCLib.lean @@ -1,3 +1,8 @@ +/- +Copyright © 2025 François G. Dorais. All rights reserved. +Released under Apache 2.0 license as described in the file LICENSE. +-/ + import UnicodeData open Unicode diff --git a/testTables.lean b/testTables.lean index 75fdaa406..5c310166a 100644 --- a/testTables.lean +++ b/testTables.lean @@ -3,8 +3,10 @@ Copyright © 2024-2025 François G. Dorais. All rights reserved. Released under Apache 2.0 license as described in the file LICENSE. -/ -import UnicodeBasic -import UnicodeData +module + +public import UnicodeBasic +public import UnicodeData open Unicode @@ -96,7 +98,7 @@ def testUppercase (d : UnicodeData) : Bool := def testWhiteSpace (d : UnicodeData) : Bool := PropList.isWhiteSpace d.code == lookupWhiteSpace d.code -def tests : Array (String × (UnicodeData → Bool)) := #[ +public def tests : Array (String × (UnicodeData → Bool)) := #[ ("Bidi_Class", testBidiClass), ("Alphabetic", testAlphabetic), ("Bidi_Class", testBidiClass), @@ -115,7 +117,7 @@ def tests : Array (String × (UnicodeData → Bool)) := #[ ("General_Category", testGeneralCategory), ("White_Space", testWhiteSpace)] -def main (args : List String) : IO UInt32 := do +public def main (args : List String) : IO UInt32 := do let args := if args.isEmpty then tests.map Prod.fst else args.toArray let stream : UnicodeDataStream := {} let mut err : UInt32 := 0