-
Notifications
You must be signed in to change notification settings - Fork 9
Description
Here's what I have so far as a test for spell checking. It utilizes the FuzzyString library and two dictionaries (one in keyed dictionary format, the other in fuzzyString format). I would like to optimize this one day with a list of common typos, and rebuild the fuzzyString library (not very complex) so that it can just use the main keyed dictionary, rather than having to load the dictionary again in a special format.
var scriptSrcs = {
"dictionary": "https://523510690b2627b1adb4d84214fd72c16ad36f6a.googledrive.com/host/0B22lFAneNTJbbnNZMnhNN1BhRzg",
"fuzzySet": "https://43ee79d2ad66b846ee83176ae569976401caa824.googledrive.com/host/0B22lFAneNTJbVzl0UWVVSE5Qd2M",
"fuzzySetList": "https://9c9b2aedcfc5e328423a8634b9952476438376d3.googledrive.com/host/0B22lFAneNTJbQk14d2xTRzVsMHM"
};
var shortTest = "Hey ths iz spelld supr wrng but this isn't. Just to be sure let's spell some more complecx words: acceptence acceptible acceptibly milicous milyew miniscule miniture spicific sporatic squirl."
/* Note: I meant to do this as a loop, but had trouble wth closures. Wasnt worth the trouble so just did this.
Fix it if you like. */
//loadScripts
function loadScripts() {
console.log("Loading scripts...");
$.getScript(scriptSrcs["dictionary"], function () {
console.log("dictionary finished loading!");
$.getScript(scriptSrcs["fuzzySet"], function () {
console.log("fuzzySet finished loading!");
$.getScript(scriptSrcs["fuzzySetList"], function () {
console.log("fuzzySetList finished loading!");
//spell checking ready
console.log(checkWords(shortTest));
});
});
});
}
//fix the spelling
function fixSpelling(match) {
// This matches the word to a list of real words stored in a FuzzySet object
// utlizing the Levenshtein distance equation to find a close match. Very powerful
// concept, as spell check would be rediculously expensive without it
var result = fuzzySet.get(match);
var score = result[0][0];
var replacement = result[0][1];
//console.log(match + ", " + replacement + ": " + score);
//if the replacement is very likely accurate, replace it
if (score >= 0.876) {
console.log(match + " replaced with " + replacement + ". Accuracy of: " + score);
return replacement;
//otherwise, put the original word back with no change
} else {
return match;
}
}
// Check words
var checkWords = function (str) {
//replace words only
str = str.replace(/[a-zA-Z']+/g, function (match) {
//convert to lower case
match = match.toLowerCase();
//if it isn't a defined word
if (!wordList[match]) {
//try to find the right word
return fixSpelling(match);
//otherwise replace the match with itself (no change)
} else {
return match;
}
});
return str;
};
loadScripts();
See Gist: https://gist.github.com/jt0dd/020cda2085d04b8cdcae
Note that the scripts are loaded asynchronously, and in the implementation, I'll have an icon showing how close the dictionary is to being ready; if it's not ready when the user clicks edit(it'll be cached except for the first load), spell-checking just won't take effect for that edit.