Skip to content

Commit 46c2eaa

Browse files
authored
Merge pull request #5522 from xmbhasin/713-fuzzy-name-res
2 parents 7cb8cf3 + f622319 commit 46c2eaa

File tree

20 files changed

+654
-172
lines changed

20 files changed

+654
-172
lines changed

parser-typechecker/src/Unison/DataDeclaration/Dependencies.hs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -122,5 +122,6 @@ hashFieldAccessors ppe declName vars declRef dd = do
122122
effectDecls = mempty
123123
},
124124
termsByShortname = mempty,
125+
freeNameToFuzzyTermsByShortName = Map.empty,
125126
topLevelComponents = Map.empty
126127
}

parser-typechecker/src/Unison/FileParsers.hs

Lines changed: 133 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -10,14 +10,19 @@ import Control.Monad.State (evalStateT)
1010
import Data.Foldable qualified as Foldable
1111
import Data.List (partition)
1212
import Data.List qualified as List
13+
import Data.List.NonEmpty qualified as NonEmpty
1314
import Data.Map qualified as Map
15+
import Data.Ord (clamp)
1416
import Data.Sequence qualified as Seq
1517
import Data.Set qualified as Set
18+
import Data.Text qualified as Text
1619
import Unison.ABT qualified as ABT
1720
import Unison.Blank qualified as Blank
1821
import Unison.Builtin qualified as Builtin
1922
import Unison.ConstructorReference qualified as ConstructorReference
2023
import Unison.Name (Name)
24+
import Unison.Name qualified as Name
25+
import Unison.NameSegment qualified as NameSegment
2126
import Unison.Names qualified as Names
2227
import Unison.Names.ResolvesTo (ResolvesTo (..))
2328
import Unison.Parser.Ann (Ann)
@@ -28,7 +33,7 @@ import Unison.Referent (Referent)
2833
import Unison.Referent qualified as Referent
2934
import Unison.Result (CompilerBug (..), Note (..), ResultT, pattern Result)
3035
import Unison.Result qualified as Result
31-
import Unison.Syntax.Name qualified as Name (unsafeParseVar)
36+
import Unison.Syntax.Name qualified as Name (toText, unsafeParseText, unsafeParseVar)
3237
import Unison.Syntax.Parser qualified as Parser
3338
import Unison.Term qualified as Term
3439
import Unison.Type qualified as Type
@@ -94,21 +99,56 @@ computeTypecheckingEnvironment shouldUseTndr ambientAbilities typeLookupf uf =
9499
{ ambientAbilities = ambientAbilities,
95100
typeLookup = tl,
96101
termsByShortname = Map.empty,
102+
freeNameToFuzzyTermsByShortName = Map.empty,
97103
topLevelComponents = Map.empty
98104
}
99105
ShouldUseTndr'Yes parsingEnv -> do
100-
let tm = UF.typecheckingTerm uf
101-
resolveName :: Name -> Relation Name (ResolvesTo Referent)
106+
let resolveName :: Name -> Relation Name (ResolvesTo Referent)
102107
resolveName =
103108
Names.resolveNameIncludingNames
104109
(Names.shadowing1 (Names.terms (UF.toNames uf)) (Names.terms (Parser.names parsingEnv)))
105-
(Set.map Name.unsafeParseVar (UF.toTermAndWatchNames uf))
106-
possibleDeps = do
107-
v <- Set.toList (Term.freeVars tm)
108-
let shortname = Name.unsafeParseVar v
109-
(name, ref) <- Rel.toList (resolveName shortname)
110-
[(name, shortname, ref)]
111-
possibleRefs =
110+
localNames
111+
112+
localNames = Set.map Name.unsafeParseVar (UF.toTermAndWatchNames uf)
113+
-- We exclude names from indirect dependencies for fuzzy searching during name resolution,
114+
-- that is dependencies under lib.*.lib for performance
115+
-- TODO: We may consider exposing user configuration to enable searching through indirect dependencies
116+
globalNamesShadowed = excludeNamesFromIndirectDeps $ Names.shadowing (UF.toNames uf) (Parser.names parsingEnv)
117+
where
118+
excludeNamesFromIndirectDeps = Names.filter (Name.classifyNameLocation >>> excludeIndirectDeps)
119+
excludeIndirectDeps = (\case Name.NameLocation'IndirectDep -> False; _otherwise -> True)
120+
121+
freeNames :: [Name]
122+
freeNames =
123+
Name.unsafeParseVar <$> Set.toList (Term.freeVars $ UF.typecheckingTerm uf)
124+
125+
possibleDepsExact :: [(Name, Name, ResolvesTo Referent)]
126+
possibleDepsExact = do
127+
freeName <- freeNames
128+
(name, ref) <- Rel.toList (resolveName freeName)
129+
[(name, freeName, ref)]
130+
131+
getFreeNameDepsFuzzy :: Name -> [(Name, Name, ResolvesTo Referent)]
132+
getFreeNameDepsFuzzy freeName = do
133+
let wantedTopNFuzzyMatches = 3
134+
-- We use fuzzy matching by edit distance here because it is usually more appropriate
135+
-- than FZF-style fuzzy finding for offering suggestions for typos or other user errors.
136+
let fuzzyMatches =
137+
take wantedTopNFuzzyMatches $
138+
fuzzyFindByEditDistanceRanked globalNamesShadowed localNames freeName
139+
140+
let names = fuzzyMatches ^.. each . _2
141+
let resolvedNames = Rel.toList . resolveName =<< names
142+
let getShortName longname = Name.unsafeParseText (NameSegment.toUnescapedText $ Name.lastSegment longname)
143+
144+
map (\(longname, ref) -> (longname, getShortName longname, ref)) resolvedNames
145+
146+
freeNameDepsFuzzy :: Map Name [(Name, Name, ResolvesTo Referent)]
147+
freeNameDepsFuzzy =
148+
Map.fromList [(freeName, getFreeNameDepsFuzzy freeName) | freeName <- freeNames]
149+
150+
getPossibleRefs :: [(Name, Name, ResolvesTo Referent)] -> Defns (Set TermReference) (Set TypeReference)
151+
getPossibleRefs =
112152
List.foldl'
113153
( \acc -> \case
114154
(_, _, ResolvesToNamespace ref0) ->
@@ -118,30 +158,106 @@ computeTypecheckingEnvironment shouldUseTndr ambientAbilities typeLookupf uf =
118158
(_, _, ResolvesToLocal _) -> acc
119159
)
120160
(Defns Set.empty Set.empty)
121-
possibleDeps
122-
tl <- fmap (UF.declsToTypeLookup uf <>) (typeLookupf (UF.dependencies uf <> possibleRefs))
123-
let termsByShortname :: Map Name [Either Name (Typechecker.NamedReference v Ann)]
124-
termsByShortname =
161+
162+
typeLookup <-
163+
fmap
164+
(UF.declsToTypeLookup uf <>)
165+
( typeLookupf
166+
( UF.dependencies uf
167+
<> getPossibleRefs possibleDepsExact
168+
<> getPossibleRefs (join $ Map.elems freeNameDepsFuzzy)
169+
)
170+
)
171+
172+
let getTermsByShortname :: [(Name, Name, ResolvesTo Referent)] -> Map Name [Either Name (Typechecker.NamedReference v Ann)]
173+
getTermsByShortname =
125174
List.foldl'
126175
( \acc -> \case
127176
(name, shortname, ResolvesToLocal _) -> let v = Left name in Map.upsert (maybe [v] (v :)) shortname acc
128177
(name, shortname, ResolvesToNamespace ref) ->
129-
case TL.typeOfReferent tl ref of
178+
case TL.typeOfReferent typeLookup ref of
130179
Just ty ->
131180
let v = Right (Typechecker.NamedReference name ty (Context.ReplacementRef ref))
132181
in Map.upsert (maybe [v] (v :)) shortname acc
133182
Nothing -> acc
134183
)
135184
Map.empty
136-
possibleDeps
185+
186+
let termsByShortname = getTermsByShortname possibleDepsExact
187+
let freeNameToFuzzyTermsByShortName = Map.mapWithKey (\_ v -> getTermsByShortname v) freeNameDepsFuzzy
188+
137189
pure
138190
Typechecker.Env
139191
{ ambientAbilities,
140-
typeLookup = tl,
192+
typeLookup,
141193
termsByShortname,
194+
freeNameToFuzzyTermsByShortName,
142195
topLevelComponents = Map.empty
143196
}
144197

198+
-- | 'fuzzyFindByEditDistanceRanked' finds matches for the given 'name' within 'names' by edit distance.
199+
--
200+
-- Returns a list of 3-tuples composed of an edit-distance Score, a Name, and a List of term and type references.
201+
--
202+
-- Adapted from Unison.Server.Backend.fuzzyFind
203+
--
204+
-- TODO: Consider moving to Unison.Names
205+
--
206+
-- TODO: Take type similarity into account when ranking matches
207+
fuzzyFindByEditDistanceRanked ::
208+
Names.Names ->
209+
Set Name ->
210+
Name ->
211+
[(Int, Name)]
212+
fuzzyFindByEditDistanceRanked globalNames localNames name =
213+
let query =
214+
(Text.unpack . nameToText) name
215+
216+
-- Use 'nameToTextFromLastNSegments' so edit distance is not biased towards shorter fully-qualified names
217+
-- and the name being queried is only partially qualified.
218+
fzfGlobalNames =
219+
Names.queryEditDistances nameToTextFromLastNSegments query globalNames
220+
fzfLocalNames =
221+
Names.queryEditDistances' nameToTextFromLastNSegments query localNames
222+
fzfNames = fzfGlobalNames ++ fzfLocalNames
223+
224+
-- Keep only matches with a sufficiently low edit-distance score
225+
filterByScore = filter (\(score, _, _) -> score < maxScore)
226+
227+
-- Prefer lower edit distances and then prefer shorter names by segment count
228+
rank (score, name, _) = (score, length $ Name.segments name)
229+
230+
-- Remove dupes based on refs
231+
dedupe =
232+
List.nubOrdOn (\(_, _, refs) -> refs)
233+
234+
dropRef = map (\(x, y, _) -> (x, y))
235+
236+
refine =
237+
dropRef . dedupe . sortOn rank . filterByScore
238+
in refine fzfNames
239+
where
240+
nNameSegments = max 1 $ NonEmpty.length $ Name.segments name
241+
242+
takeLast :: Int -> NonEmpty.NonEmpty a -> [a]
243+
takeLast n xs = NonEmpty.drop (NonEmpty.length xs - n) xs
244+
nameFromLastNSegments =
245+
Name.fromSegments
246+
. NonEmpty.fromList
247+
. takeLast nNameSegments
248+
. Name.segments
249+
250+
-- Convert to lowercase for case-insensitive fuzzy matching
251+
nameToText = Text.toLower . Name.toText
252+
nameToTextFromLastNSegments = nameToText . nameFromLastNSegments
253+
254+
ceilingDiv :: Int -> Int -> Int
255+
ceilingDiv x y = (x + 1) `div` y
256+
-- Expect edit distances (number of typos) to be about half the length of the name being queried
257+
-- But clamp max edit distance to work well with very short names
258+
-- and keep ranking reasonably fast when a verbose name is queried
259+
maxScore = clamp (3, 16) $ Text.length (nameToText name) `ceilingDiv` 2
260+
145261
synthesizeFile ::
146262
forall m v.
147263
(Monad m, Var v) =>

0 commit comments

Comments
 (0)