User:Inductiveload/Sandbox/long-s replacements

Some regex replacements that can make a substantial dent in long-s corrections.

This uses the fact that a lot of locations in words will never be "f" (e.g. "fhip"), so we can assume it's an OCR error for a long-s.

This list is empirical, and may very occasionally over-correct (e.g. when a very common word is often mistaken, but the correction could rarely break a correct, but uncommon, word), but the hope it is will still end up with fewer mistakes than before.

    var long_s_reps = [
      [/([^i])fic\b/, "$1sic"],
      [/([Ee])aft/, "$1ast"],
      [/([W])eft/, "$1est"], // assume Weft is West, but weft is like fabric
      [/(af|un)?focia/, "$1socia"],
      [/(A|a)nfwer/, "$1nswer"],
      [/(ef)?fent/, "$1sent"], // essential, sent, sentinel
      [/(other|like)wife/, "$1wise"],
      [/\bfide\b/, "side"],
      [/\bfo\b/, "so"],
      [/\breft/, "rest"],
      [/abfo/, "abso"],
      [/ccef[fs]/, "ccess"],
      [/affer/, "asser"],
      [/affi([fs])/, "assis"], // assis
      [/aff(um|ur)/, "ass$1"], // assume, assure
      [/Afia/, "Asia"],
      [/aftic/, "astic"],
      [/afty/, "asty"],
      [/alfo/, "also"],
      [/apfe/, "apse"],
      [/aufp/, "ausp"],
      [/baffy/, "bassy"],
      [/([Bb])afe/, "$1ase"],
      [/([Bb])eft/, "$1est"],
      [/([Cc])afua/, "$1asua"],
      [/([Cc])auf/, "$1aus"],
      [/([Cc])eaf(?!a)/, "$1eas"],
      [/ceff/, "cess"], // necessary
      [/cefs\b/, "cess"], // princess
      [/Chrif/, "Chris"],
      [/cife/, "cise"],
      [/claf[fs]/, "class"],
      [/clofe/, "close"],
      [/conf(i|t|eq)/, "cons$1"], // const, conseq...
      [/courfe/, "course"],
      [/([Cc])roff\B/, "$1ross"], // cross-
      [/([Cc])rofs\b/, "$1ross"], // cross
      [/defcr/, "descr"],
      [/efer([vt])/, "eser$1"], // deserve-, desert-
      [/([dD])if([ocprgqst]|ad)/, "$1is$2"], // dis-
      [/\b([dD])if([^f]\w)/, "$1is$2"],
      [/diffol/, "dissol"],
      [/defir/, "desir"],
      [/efour/, "esour"],
      [/efpe/, "espe"], // especial
      [/([Bb])eft(\b|ed|ing)/, "$1est$1"],
      [/([^kgrdw])eft\b/, "$1est"], // -est
      [/([Ee])fta/, "$1sta"], // establish
      [/([Ee])fti/, "$1sti"], // estimate
      [/enfes/, "enses"],
      [/ennf/, "enns"], // Pennsylv etc
      [/erfal/, "ersal"],
      [/fa(cr|fe|ga|id|le|lut|tis|w\b)/, "sa$1"],
      [/fatif(?!e)/, "satis"],
      [/fca([^s])/, "sca$1"], // scarce, scant, etc (not briefcase)
      [/fchem/, "schem"],
      [/fc(ie|ious|ure|en)/, "sc$1"], // science, conscious, secure
      [/fenf/, "sens"],
      [/fe(a\b|af|cl|co)/, "se$1"], // season, seclude, second
      [/fee(m|n|ing)/, "see$1"], // seen, seem
      [/fe(ek|gr)/, "se$1"],
      [/felec/, "selec"],
      [/fel(f|v)/, "sel$1"],
      [/fenfe/, "sense"],
      [/feri([eo])/, "seri$1"],
      [/fervi/, "servi"],
      [/fettle(m)/, "settle$1"],
      [/fevera/, "severa"],
      [/fing(le|u)/, "sing$1"], // single, singular
      [/fis\b/, "sis"], // -sis
      [/ffidu/, "ssidu"], // Assiduous
      [/fh(al|ut|ip|o)/, "sh$1"],
      [/inifter/, "inister"],
      [/fidera/, "sidera"], // considerable/ation/ate
      [/fift(?!h)/, "sist"], // subsist, consist
      [/fign/, "sign"],
      [/fimi/, "simi"],
      [/fion/, "sion"],
      [/firft/, "first"],
      [/fite\b/, "site"],
      [/fitive/, "sitive"],
      [/fitu/, "situ"],
      [/flowl/, "slowl"],
      [/flowne/, "slowne"],
      [/fm(an|en|all|oth|ooth)/, "sm$1"], // small, helmsmen, smooth
      [/focie/, "socie"],
      [/fole/, "sole"],
      [/foli/, "soli"],
      [/fome/, "some"],
      [/foon/, "soon"],
      [/foph/, "soph"], // -sopher/y
      [/fourc/, "sourc"],
      [/fouth/, "South"],
      [/fov/, "sov"],
      [/fpade/, "spade"],
      [/fpawn/, "spawn"],
      [/fpeak/, "speak"],
      [/fpec/, "spec"],
      [/fpee/, "spee"],
      [/fpir/, "spir"], //spirir, spiral,
      [/ft(air|an|at|eem|ep|ill|on|oo|r|ud|y)/, "st$1"],
      [/\bft(u)/, "st$1"],
      [/fubf/, "subs"], // do before fub
      [/fub/, "sub"],
      [/fucc/, "succ"],
      [/fuch/, "such"],
      [/fuf(p)/, "sus$1"],
      [/fuff/, "suff"],
      [/fund(?!rais)/, "sund"],
      [/fumm/, "summ"], // summit, summary
      [/fuit/, "suit"],
      [/fuper/, "super"],
      [/fupp/, "supp"],
      [/fure/, "sure"],
      [/furv/, "surv"],
      [/fway/, "sway"],
      [/fyf/, "sys"],
      [/fym/, "sym"],
      [/grefs/, "gress"],
      [/hift/, "hist"],
      [/ifh/, "ish"],
      [/ifm\b/, "ism"],
      [/ifon/, "ison"],
      [/iftic/, "istic"],
      [/illuf/, "illus"],
      [/(I|i)nft/, "$1nst"],
      [/Jefus/, "Jesus"],
      [/([Jj])uft/, "$1ust"],
      [/([Ll])aft/, "$1ast"], // last, lastly, etc
      [/lefia/, "lesia"],
      [/([^ie])efs/, "$1ess"], // -ess
      [/leff/, "less"], // -ess-
      [/lifh/, "lish"],
      [/([MmPp])afs\b/, "$1ass"],
      [/([Mm])i(f\B|fs\b)/, "$1i$2"],
      [/Miffifippi/, "Missisippi"],
      [/Miffiffippi/, "Mississippi"],
      [/([Mm])oft/, "most"],
      [/([Mm])uft/, "must"],
      [/nefe/, "nese"],
      [/nefs/, "ness"],
      [/nfate/, "nsate"],
      [/nfive/, "nsive"],
      [/oaft/, "oast"], // coast, etc
      [/obf/, "obs"],
      [/obfe/, "obse"], // observ
      [/ofed/, "osed"],
      [/offefs/, "ossess"],
      [/offi/, "poss"],
      [/ofition/, "osition"], // position, etc
      [/ofity/, "osity"],
      [/ouf\b/, "ous"],
      [/oufly/, "ously"],
      [/([Pp])aft/, "$1ast"],
      [/erfon/, "erson"],
      [/erfua/, "ersua"],
      [/erfue/, "ersue"],
      [/erfui/, "ersui"],
      [/eruf/, "erus"],
      [/hraf/, "hras"], // phrase
      [/paff/, "pass"], // pass/age, for pafs, see mafs
      [/([Pp])leaf/, "$1leas"],
      [/([Pp])of(e|t)/, "$1os$2"], // post, pose, compose...
      [/ref([fs])/, "res$1"],
      [/refen/, "resen"],
      [/\b([Aa]r|[Rr])ifi/, "$1isi"], // a/rising
      [/rofef([sf])/, "rofess"],
      [/rofp/, "rosp"],
      [/urpof/, "urpos"],
      [/queft/, "quest"],
      [/reafo/, "reaso"],
      [/refea/, "resea"],
      [/refi/, "resi"],
      [/([Tt])afte/, "$1aste"],
      [/terfect/, "tersect"], // intersect, but not perfect, etc
      [/hefe/, "hese"], // these
      [/hofe/, "hose"], // those, whose
      [/traft/, "trast"],
      [/ranf/, "rans"], // trans-
      [/ufe/, "use"],
      [/vaft/, "vast"],
      [/([Vv])eff/, "vess"],
      [/verf([eyo])/, "vers$1"], //verse, verso -versy
      [/([Vv])ifi/, "$1isi"],
      [/ifdom/, "isdom"],
      [/xift/, "xist"],
    ];