Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
141 changes: 0 additions & 141 deletions UnicodeBasic.lean
Original file line number Diff line number Diff line change
Expand Up @@ -114,32 +114,11 @@ where
.Sk, .Ll, .Ll, .Ll, .Ll, .Ll, .Ll, .Ll, .Ll, .Ll, .Ll, .Ll, .Ll, .Ll, .Ll, .Ll,
.Ll, .Ll, .Ll, .Ll, .Ll, .Ll, .Ll, .Ll, .Ll, .Ll, .Ll, .Ps, .Sm, .Po, .Sm, .Cc]

set_option linter.deprecated false in
@[deprecated Unicode.getGC (since := "1.2.0")]
def getGeneralCategory (char : Char) : GeneralCategory :=
.ofGC! (getGC char)

instance : Membership Char GC where
mem cat char := getGC char ⊆ cat

instance (char : Char) (cat : GC) : Decidable (char ∈ cat) := inferInstanceAs (Decidable (_ ⊆ _))

set_option linter.deprecated false in
@[deprecated "use `char ∈ category` instead" (since := "1.2.0")]
def isInGeneralCategory (char : Char) (category : GeneralCategory) : Bool :=
match category, getGeneralCategory char with
| ⟨major, none⟩, ⟨charMajor, _⟩ => major == charMajor
| ⟨_, some .casedLetter⟩, ⟨_, some .lowercaseLetter⟩ => true
| ⟨_, some .casedLetter⟩, ⟨_, some .titlecaseLetter⟩ => true
| ⟨_, some .casedLetter⟩, ⟨_, some .uppercaseLetter⟩ => true
| ⟨_, some .casedLetter⟩, _ => false
| ⟨_, some .groupPunctuation⟩, ⟨_, some .openPunctuation⟩ => true
| ⟨_, some .groupPunctuation⟩, ⟨_, some .closePunctuation⟩ => true
| ⟨_, some .groupPunctuation⟩, _ => false
| ⟨_, some .quotePunctuation⟩, ⟨_, some .initialPunctuation⟩ => true
| ⟨_, some .quotePunctuation⟩, ⟨_, some .finalPunctuation⟩ => true
| ⟨_, some .quotePunctuation⟩, _ => false
| cat, charCat => cat == charCat

namespace GeneralCategory

Expand All @@ -150,341 +129,221 @@ namespace GeneralCategory
Unicode Property: `General_Category=L` -/
abbrev isLetter (char : Char) : Bool := char ∈ Unicode.GC.L

@[deprecated "c ∈ Unicode.GC.L" (since := "1.2.0")]
protected abbrev isL := isLetter

/-- Check if lowercase letter character (`Ll`)

Unicode Property: `General_Category=Ll` -/
abbrev isLowercaseLetter (char : Char) : Bool := char ∈ Unicode.GC.Ll

@[deprecated "c ∈ Unicode.GC.Ll" (since := "1.2.0")]
protected abbrev isLl := isLowercaseLetter

/-- Check if titlecase letter character (`Lt`)

Unicode Property: `General_Category=Lt` -/
abbrev isTitlecaseLetter (char : Char) : Bool := char ∈ Unicode.GC.Lt

@[deprecated "c ∈ Unicode.GC.Lt" (since := "1.2.0")]
protected abbrev isLt := isTitlecaseLetter

/-- Check if uppercase letter character (`Lu`)

Unicode Property: `General_Category=Lu` -/
abbrev isUppercaseLetter (char : Char) : Bool := char ∈ Unicode.GC.Lu

@[deprecated "c ∈ Unicode.GC.Lu" (since := "1.2.0")]
protected abbrev isLu := isUppercaseLetter

/-- Check if cased letter character (`LC`)

This is a derived category (`L = Lu | Ll | Lt`).

Unicode Property: `General_Category=LC` -/
abbrev isCasedLetter (char : Char) : Bool := char ∈ Unicode.GC.LC

@[deprecated "c ∈ Unicode.GC.LC" (since := "1.2.0")]
protected abbrev isLC := isCasedLetter

/-- Check if modifier letter character (`Lm`)

Unicode Property: `General_Category=Lm`-/
abbrev isModifierLetter (char : Char) : Bool := char ∈ Unicode.GC.Lm

@[deprecated "c ∈ Unicode.GC.Lm" (since := "1.2.0")]
protected abbrev isLm := isModifierLetter

/-- Check if other letter character (`Lo`)

Unicode Property: `General_Category=Lo`-/
abbrev isOtherLetter (char : Char) : Bool := char ∈ Unicode.GC.Lo

@[deprecated "c ∈ Unicode.GC.Lo" (since := "1.2.0")]
protected abbrev isLo := isOtherLetter

/-- Check if mark character (`M`)

This is a derived category (`M = Mn | Mc | Me`).

Unicode Property: `General_Category=M` -/
abbrev isMark (char : Char) : Bool := char ∈ Unicode.GC.M

@[deprecated "c ∈ Unicode.GC.M" (since := "1.2.0")]
protected abbrev isM := isMark

/-- Check if nonspacing combining mark character (`Mn`)

Unicode Property: `General_Category=Mn` -/
abbrev isNonspacingMark (char : Char) : Bool := char ∈ Unicode.GC.Mn

@[deprecated "c ∈ Unicode.GC.Mn" (since := "1.2.0")]
protected abbrev isMn := isNonspacingMark

/-- Check if spacing combining mark character (`Mc`)

Unicode Property: `General_Category=Mc` -/
abbrev isSpacingMark (char : Char) : Bool := char ∈ Unicode.GC.Mc

@[deprecated "c ∈ Unicode.GC.Mc" (since := "1.2.0")]
protected abbrev isMc := isSpacingMark

/-- Check if enclosing combining mark character (`Me`)

Unicode Property: `General_Category=Me` -/
abbrev isEnclosingMark (char : Char) : Bool := char ∈ Unicode.GC.Me

@[deprecated "c ∈ Unicode.GC.Me" (since := "1.2.0")]
protected abbrev isMe := isEnclosingMark

/-- Check if number character (`N`)

This is a derived category (`N = Nd | Nl | No`).

Unicode Property: `General_Category=N` -/
abbrev isNumber (char : Char) : Bool := char ∈ Unicode.GC.N

@[deprecated "c ∈ Unicode.GC.N" (since := "1.2.0")]
protected abbrev isN := isNumber

/-- Check if decimal number character (`Nd`)

Unicode Property: `General_Category=Nd` -/
abbrev isDecimalNumber (char : Char) : Bool := char ∈ Unicode.GC.Nd

@[deprecated "c ∈ Unicode.GC.Nd" (since := "1.2.0")]
protected abbrev isNd := isDecimalNumber

/-- Check if letter number character (`Nl`)

Unicode Property: `General_Category=Nl` -/
abbrev isLetterNumber (char : Char) : Bool := char ∈ Unicode.GC.Nl

@[deprecated "c ∈ Unicode.GC.Nl" (since := "1.2.0")]
protected abbrev isNl := isLetterNumber

/-- Check if other number character (`No`)

Unicode Property: `General_Category=No` -/
abbrev isOtherNumber (char : Char) : Bool := char ∈ Unicode.GC.No

@[deprecated "c ∈ Unicode.GC.No" (since := "1.2.0")]
protected abbrev isNo := isOtherNumber

/-- Check if punctuation character (`P`)

This is a derived category (`P = Pc | Pd | Ps | Pe | Pi | Pf | Po`).

Unicode Property: `General_Category=P` -/
abbrev isPunctuation (char : Char) : Bool := char ∈ Unicode.GC.P

@[deprecated "c ∈ Unicode.GC.P" (since := "1.2.0")]
protected abbrev isP := isPunctuation

/-- Check if connector punctuation character (`Pc`)

Unicode Property: `General_Category=Pc` -/
abbrev isConnectorPunctuation (char : Char) : Bool := char ∈ Unicode.GC.Pc

@[deprecated "c ∈ Unicode.GC.Pc" (since := "1.2.0")]
protected abbrev isPc := isConnectorPunctuation

/-- Check if dash punctuation character (`Pd`)

Unicode Property: `General_Category=Pd` -/
abbrev isDashPunctuation (char : Char) : Bool := char ∈ Unicode.GC.Pd

@[deprecated "c ∈ Unicode.GC.Pd" (since := "1.2.0")]
protected abbrev isPd := isDashPunctuation

/-- Check if grouping punctuation character (`PG`)

This is a derived category (`PG = Ps | Pe`).

Unicode Property: `General_Category=PG` -/
abbrev isGroupPunctuation (char : Char) : Bool := char ∈ Unicode.GC.PG

@[deprecated "c ∈ Unicode.GC.PG" (since := "1.2.0")]
protected abbrev isPG := isGroupPunctuation

/-- Check if open punctuation character (`Ps`)

Unicode Property: `General_Category=Ps` -/
abbrev isOpenPunctuation (char : Char) : Bool := char ∈ Unicode.GC.Ps

@[deprecated "c ∈ Unicode.GC.Ps" (since := "1.2.0")]
protected abbrev isPs := isOpenPunctuation

/-- Check if close punctuation character (`Pe`)

Unicode Property: `General_Category=Pe` -/
abbrev isClosePunctuation (char : Char) : Bool := char ∈ Unicode.GC.Pe

@[deprecated "c ∈ Unicode.GC.Pe" (since := "1.2.0")]
protected abbrev isPe := isClosePunctuation

/-- Check if quoting punctuation character (`PQ`)

This is a derived category (`PQ = Pi | Pf`).

Unicode Property: `General_Category=PQ` -/
abbrev isQuotePunctuation (char : Char) : Bool := char ∈ Unicode.GC.PQ

@[deprecated "c ∈ Unicode.GC.PQ" (since := "1.2.0")]
protected abbrev isPQ := isQuotePunctuation

/-- Check if initial punctuation character (`Pi`)

Unicode Property: `General_Category=Pi` -/
abbrev isInitialPunctuation (char : Char) : Bool := char ∈ Unicode.GC.Pi

@[deprecated "c ∈ Unicode.GC.Pi" (since := "1.2.0")]
protected abbrev isPi := isInitialPunctuation

/-- Check if initial punctuation character (`Pf`)

Unicode Property: `General_Category=Pf` -/
abbrev isFinalPunctuation (char : Char) : Bool := char ∈ Unicode.GC.Pf

@[deprecated "c ∈ Unicode.GC.Pf" (since := "1.2.0")]
protected abbrev isPf := isFinalPunctuation

/-- Check if other punctuation character (`Po`)

Unicode Property: `General_Category=Po` -/
abbrev isOtherPunctuation (char : Char) : Bool := char ∈ Unicode.GC.Po

@[deprecated "c ∈ Unicode.GC.Po" (since := "1.2.0")]
protected abbrev isPo := isOtherPunctuation

/-- Check if symbol character (`S`)

This is a derived category (`S = Sm | Sc | Sk | So`).

Unicode Property: `General_Category=S` -/
abbrev isSymbol (char : Char) : Bool := char ∈ Unicode.GC.S

@[deprecated "c ∈ Unicode.GC.S" (since := "1.2.0")]
protected abbrev isS := isSymbol

/-- Check if math symbol character (`Sm`)

Unicode Property: `General_Category=Sm` -/
abbrev isMathSymbol (char : Char) : Bool := char ∈ Unicode.GC.Sm

@[deprecated "c ∈ Unicode.GC.Sm" (since := "1.2.0")]
protected abbrev isSm := isMathSymbol

/-- Check if currency symbol character (`Sc`)

Unicode Property: `General_Category=Sc` -/
abbrev isCurrencySymbol (char : Char) : Bool := char ∈ Unicode.GC.Sc

@[deprecated "c ∈ Unicode.GC.Sc" (since := "1.2.0")]
protected abbrev isSc := isCurrencySymbol

/-- Check if modifier symbol character (`Sk`)

Unicode Property: `General_Category=Sk` -/
abbrev isModifierSymbol (char : Char) : Bool := char ∈ Unicode.GC.Sk

@[deprecated "c ∈ Unicode.GC.Sk" (since := "1.2.0")]
protected abbrev isSk := isModifierSymbol

/-- Check if other symbol character (`So`)

Unicode Property: `General_Category=So` -/
abbrev isOtherSymbol (char : Char) : Bool := char ∈ Unicode.GC.So

@[deprecated "c ∈ Unicode.GC.So" (since := "1.2.0")]
protected abbrev isSo := isOtherSymbol

/-- Check if separator character (`Z`)

This is a derived property (`Z = Zs | Zl | Zp`).

Unicode Property: `General_Category=Z` -/
abbrev isSeparator (char : Char) : Bool := char ∈ Unicode.GC.Z

@[deprecated "c ∈ Unicode.GC.Z" (since := "1.2.0")]
protected abbrev isZ := isSeparator

/-- Check if space separator character (`Zs`)

Unicode Property: `General_Category=Zs` -/
abbrev isSpaceSeparator (char : Char) : Bool := char ∈ Unicode.GC.Zs

@[deprecated "c ∈ Unicode.GC.Zs" (since := "1.2.0")]
protected abbrev isZs := isSpaceSeparator

/-- Check if line separator character (`Zl`)

Unicode Property: `General_Category=Zl` -/
abbrev isLineSeparator (char : Char) : Bool := char ∈ Unicode.GC.Zl

@[deprecated "c ∈ Unicode.GC.Zl" (since := "1.2.0")]
protected abbrev isZl := isLineSeparator

/-- Check if paragraph separator character (`Zp`)

Unicode Property: `General_Category=Zp` -/
abbrev isParagraphSeparator (char : Char) : Bool := char ∈ Unicode.GC.Zp

@[deprecated "c ∈ Unicode.GC.Zp" (since := "1.2.0")]
protected abbrev isZp := isParagraphSeparator

/-- Check if other character (`C`)

This is a derived category (`C = Cc | Cf | Cs | Co | Cn`).

Unicode Property: `General_Category=C` -/
abbrev isOther (char : Char) : Bool := char ∈ Unicode.GC.C

@[deprecated "c ∈ Unicode.GC.C" (since := "1.2.0")]
protected abbrev isC := isOther

/-- Check if control character (`Cc`)

Unicode Property: `General_Category=Cc` -/
abbrev isControl (char : Char) : Bool := char ∈ Unicode.GC.Cc

@[deprecated "c ∈ Unicode.GC.Cc" (since := "1.2.0")]
protected abbrev isCc := isControl

/-- Check if format character (`Cf`)

Unicode Property: `General_Category=Cf` -/
abbrev isFormat (char : Char) : Bool := char ∈ Unicode.GC.Cf

@[deprecated "c ∈ Unicode.GC.Cf" (since := "1.2.0")]
protected abbrev isCf := isFormat

/-- Check if surrogate character (`Cs`)

Does not actually occur since Lean does not regard surrogate code points as characters.

Unicode Property: `General_Category=Cs` -/
abbrev isSurrogate (char : Char) : Bool := char ∈ Unicode.GC.Cs

@[deprecated "c ∈ Unicode.GC.Cs" (since := "1.2.0")]
protected abbrev isCs := isSurrogate

/-- Check if private use character (`Co`)

Unicode Property: `General_Category=Co` -/
abbrev isPrivateUse (char : Char) : Bool := char ∈ Unicode.GC.Co

@[deprecated "c ∈ Unicode.GC.Co" (since := "1.2.0")]
protected abbrev isCo := isPrivateUse

/-- Check if unassigned character (`Cn`)

Unicode Property: `General_Category=Cn` -/
abbrev isUnassigned (char : Char) : Bool := char ∈ Unicode.GC.Cn

@[deprecated "c ∈ Unicode.GC.Cn" (since := "1.2.0")]
protected abbrev isCn := isUnassigned

end GeneralCategory

/-!
Expand Down
5 changes: 0 additions & 5 deletions UnicodeBasic/TableLookup.lean
Original file line number Diff line number Diff line change
Expand Up @@ -180,11 +180,6 @@ where
@[inline]
def lookupGC (c : UInt32) : GC := CLib.lookupProp c |>.toUInt32

set_option linter.deprecated false in
@[inline, deprecated Unicode.lookupGC (since := "v1.3.0")]
def lookupGeneralCategory (c : UInt32) : GeneralCategory :=
.ofGC! (lookupGC c)

/-- Get name of a code point using lookup table

Unicode property: `Name` -/
Expand Down
Loading